In [2]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import kpss

In [4]:
def kpss_test(series, **kw):    
    statistic, p_value, n_lags, critical_values = kpss(series, **kw)
    # Format Output
    print(f'KPSS Statistic: {statistic}')
    print(f'p-value: {p_value}')
    print(f'num lags: {n_lags}')
    print('Critial Values:')
    for key, value in critical_values.items():
        print(f'   {key} : {value}')
    print(f'Result: The series is {"not " if p_value < 0.05 else ""}stationary')

In [8]:
data = pd.read_csv('./aapl_indicators.csv')
data = data.drop(['Unnamed: 0', 'date'], axis=1)
data.head()

Unnamed: 0,open,close,low,high,volume,bb_bbm,bb_bbh,bb_bbl,atr,macd,cci,ema,roc,sma12,sma5,stochastic_oscillator,mtm6,mtm12
0,4.0714,4.1027,3.9755,4.1451,0.0,3.742555,4.258224,3.226886,0.258809,0.028209,108.782044,3.824942,3.199598,3.819608,3.8505,75.940406,10.722189,3.199598
1,4.0804,4.0224,4.0179,4.1786,0.0,3.77805,4.267487,3.288613,0.251801,0.041007,99.551288,3.85127,6.000474,3.838583,3.94916,80.342311,12.344989,6.000474
2,4.0224,4.0536,3.913,4.067,0.0,3.82504,4.222099,3.427981,0.244815,0.053055,72.677158,3.878247,1.115019,3.842308,4.02192,84.268814,14.862146,1.115019
3,4.0581,3.884,3.8706,4.0759,0.0,3.84647,4.205897,3.487043,0.241993,0.048361,40.605138,3.879014,-1.303585,3.838033,4.02728,62.924742,5.263158,-1.303585
4,3.9018,4.1362,3.884,4.1384,0.0,3.873925,4.233795,3.514055,0.242879,0.06425,80.64975,3.913306,5.284325,3.855333,4.03978,94.663982,7.233226,5.284325


In [11]:
open_data = data['open']
close_data = data['close']
low_data = data['low']
high_data = data['high']

In [12]:
print("Open")
kpss_test(open_data)
print("Close")
kpss_test(close_data)
print("Low")
kpss_test(low_data)
print("High")
kpss_test(high_data)

Open
KPSS Statistic: 12.423021146201567
p-value: 0.01
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary
Close
KPSS Statistic: 12.456341137320372
p-value: 0.01
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary
Low
KPSS Statistic: 12.366125917310145
p-value: 0.01
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary
High
KPSS Statistic: 12.45945490873185
p-value: 0.01
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary




In [23]:
def diff(series):
    l_i = list(series)
    l_o = []
    for i in range(len(l_i)-1):
        l_o.append(l_i[i+1] - l_i[i])
    return(np.array(l_o))

In [24]:
diffopen = diff(open_data)
diffclose = diff(close_data)
difflow = diff(low_data)
diffhigh = diff(high_data)

In [29]:
print("Open")
kpss_test(diffopen)
print("Close")
kpss_test(diffclose)
print("Low")
kpss_test(difflow)
print("High")
kpss_test(diffhigh)

Open
KPSS Statistic: 0.35255534257179066
p-value: 0.09760545578802127
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is stationary
Close
KPSS Statistic: 0.42326442956838806
p-value: 0.0671274010481086
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is stationary
Low
KPSS Statistic: 0.25685457635879194
p-value: 0.1
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is stationary
High
KPSS Statistic: 0.6818762397948399
p-value: 0.015193069109560007
num lags: 33
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
Result: The series is not stationary




In [30]:
def add_zero(series):
    return(np.array([0] + list(series)))

In [31]:
f_open = add_zero(diffopen)
f_close = add_zero(diffclose)
f_high = add_zero(diffhigh)
f_low = add_zero(difflow)

In [34]:
fl = [f_open, f_close, f_high, f_low]
l2 = ['open', 'close', 'low', 'high']
for i in range(len(fl)):
    data[l2[i]] = fl[i]

In [36]:
diffdata = data.iloc[1:]

Unnamed: 0,open,close,low,high,volume,bb_bbm,bb_bbh,bb_bbl,atr,macd,cci,ema,roc,sma12,sma5,stochastic_oscillator,mtm6,mtm12
1,0.009,-0.0803,0.0335,0.0424,0.0,3.77805,4.267487,3.288613,0.251801,0.041007,99.551288,3.85127,6.000474,3.838583,3.94916,80.342311,12.344989,6.000474
2,-0.058,0.0312,-0.1116,-0.1049,0.0,3.82504,4.222099,3.427981,0.244815,0.053055,72.677158,3.878247,1.115019,3.842308,4.02192,84.268814,14.862146,1.115019
3,0.0357,-0.1696,0.0089,-0.0424,0.0,3.84647,4.205897,3.487043,0.241993,0.048361,40.605138,3.879014,-1.303585,3.838033,4.02728,62.924742,5.263158,-1.303585
4,-0.1563,0.2522,0.0625,0.0134,0.0,3.873925,4.233795,3.514055,0.242879,0.06425,80.64975,3.913306,5.284325,3.855333,4.03978,94.663982,7.233226,5.284325
5,0.2266,0.1138,0.144,0.2321,0.0,3.90082,4.287682,3.513958,0.237409,0.085045,138.698707,3.958198,17.096019,3.907042,4.06924,96.393589,4.327761,17.096019


In [37]:
diffdata.to_csv('./diffaapl.csv')

In [38]:
diffdata['close']

1       -0.0803
2        0.0312
3       -0.1696
4        0.2522
5        0.1138
6       -0.1808
7        0.0335
8       -0.1294
9        0.0915
10       0.0870
11      -0.0379
12      -0.1719
13       0.1027
14       0.0491
15       0.5603
16      -0.2969
17       0.2132
18      -0.0815
19      -0.1005
20      -0.0312
21       0.0089
22       0.1250
23      -0.1585
24      -0.2522
25       0.0714
26       0.1898
27       0.1227
28      -0.0714
29       0.4263
30       0.3303
         ...   
5080     4.1000
5081     3.2600
5082     5.2300
5083     1.1100
5084     5.3800
5085     5.1700
5086     4.6600
5087    -6.2100
5088    -0.5000
5089     1.2000
5090    -2.0090
5091     7.0890
5092    -1.7700
5093     6.1500
5094    -2.0700
5095     1.8000
5096    -2.0500
5097     1.2400
5098     0.1200
5099    -0.4800
5100     4.0400
5101     1.6600
5102     1.6700
5103    -2.7700
5104     9.1800
5105     2.0500
5106    10.2700
5107     8.9300
5108   -16.7690
5109     2.5490
Name: close, Length: 510