In [1]:
import eikon as ek
import numpy as np
import pandas as pd
import cufflinks as cf
from sklearn.svm import SVC
import warnings; warnings.simplefilter('ignore')
from statsmodels.tsa.stattools import adfuller
import configparser as cp

In [2]:
import sys
print(sys.version)

3.7.5 (default, Nov  1 2019, 02:16:23) 
[Clang 11.0.0 (clang-1100.0.33.8)]


In [24]:
with open('../key.txt', 'r') as file:
    key = file.read()
print(key)

db3b24135a174fb98684d4e21dbd7c7f46671263


In [25]:
ek.set_app_key(key)

In [26]:
rics = [
    'SPY',
    'AAPL.O',
    'AMZN.O'
]

In [27]:
data = pd.DataFrame()
for ric in rics:
    data[ric] = ek.get_timeseries(ric, fields = 'CLOSE', start_date = '2019-03-22 10:30:00', end_date = '2019-03-22 16:00:00', interval='minute')['CLOSE']

In [28]:
data.tail()

Unnamed: 0_level_0,SPY,AAPL.O,AMZN.O
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-22 15:56:00,280.22,193.9442,1781.693
2019-03-22 15:57:00,280.05,193.61,1777.54
2019-03-22 15:58:00,280.1,193.525,1778.29
2019-03-22 15:59:00,279.84,193.33,1778.22
2019-03-22 16:00:00,279.64,192.95,1777.1669


In [7]:
data.dropna(inplace=True)

In [8]:
rets = np.log(data / data.shift(1)).dropna()

In [9]:
rets.head()

Unnamed: 0_level_0,SPY,AAPL.O,AMZN.O
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-22 10:39:00,0.000423,-0.000102,5.5e-05
2019-03-22 10:41:00,3.5e-05,0.000563,-6e-06
2019-03-22 10:46:00,3.5e-05,-0.000819,-0.000121
2019-03-22 10:47:00,0.000141,5.1e-05,-7.2e-05
2019-03-22 10:48:00,7e-05,-0.000461,6e-06


In [10]:
rets.tail()

Unnamed: 0_level_0,SPY,AAPL.O,AMZN.O
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-22 15:56:00,0.000714,0.001284,0.000316
2019-03-22 15:57:00,-0.000607,-0.001725,-0.002334
2019-03-22 15:58:00,0.000179,-0.000439,0.000422
2019-03-22 15:59:00,-0.000929,-0.001008,-3.9e-05
2019-03-22 16:00:00,-0.000715,-0.001967,-0.000592


In [11]:
adfuller(data['AAPL.O'])

(-0.7688681962600513,
 0.8280962209540412,
 16,
 275,
 {'1%': -3.454355055831705,
  '5%': -2.8721080938842976,
  '10%': -2.572401325619835},
 -102.44787871661259)

In [12]:
adfuller(rets['AAPL.O'])

(-5.438363829857754,
 2.812262985748501e-06,
 15,
 275,
 {'1%': -3.454355055831705,
  '5%': -2.8721080938842976,
  '10%': -2.572401325619835},
 -2993.8286058329886)

In [13]:
data.normalize().iplot(kind='lines')

In [14]:
rets.iplot(kind='histogram', subplots = True)

In [15]:
n = 15
df = pd.DataFrame(np.arange(n), index=pd.date_range('2018-1-1', periods=n, freq='B'),
                 columns=['data'])
df

Unnamed: 0,data
2018-01-01,0
2018-01-02,1
2018-01-03,2
2018-01-04,3
2018-01-05,4
2018-01-08,5
2018-01-09,6
2018-01-10,7
2018-01-11,8
2018-01-12,9


In [16]:
lags = 15
for lag in range(1, lags + 1):
    df['lags{}'.format(lag)] = df['data'].shift(lag)

In [17]:
df

Unnamed: 0,data,lags1,lags2,lags3,lags4,lags5,lags6,lags7,lags8,lags9,lags10,lags11,lags12,lags13,lags14,lags15
2018-01-01,0,,,,,,,,,,,,,,,
2018-01-02,1,0.0,,,,,,,,,,,,,,
2018-01-03,2,1.0,0.0,,,,,,,,,,,,,
2018-01-04,3,2.0,1.0,0.0,,,,,,,,,,,,
2018-01-05,4,3.0,2.0,1.0,0.0,,,,,,,,,,,
2018-01-08,5,4.0,3.0,2.0,1.0,0.0,,,,,,,,,,
2018-01-09,6,5.0,4.0,3.0,2.0,1.0,0.0,,,,,,,,,
2018-01-10,7,6.0,5.0,4.0,3.0,2.0,1.0,0.0,,,,,,,,
2018-01-11,8,7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0,,,,,,,
2018-01-12,9,8.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0,,,,,,


In [18]:
df.dropna().astype(int)

Unnamed: 0,data,lags1,lags2,lags3,lags4,lags5,lags6,lags7,lags8,lags9,lags10,lags11,lags12,lags13,lags14,lags15


In [19]:
def add_lags(data, ric, lags):
    cols = []
    df = pd.DataFrame(rets[ric])
    for lag in range(1, lags + 1):
        col = 'lag_{}'.format(lag)  # defines the column name
        # creates the lagged data column with directional values
        df[col] = df[ric].shift(lag)
        cols.append(col)  # stores the column name
    df.dropna(inplace=True)  # gets rid of incomplete data rows
    return df, cols

In [20]:
dfs = {}
for ric in rics:
    df, cols = add_lags(data, ric, lags)
    dfs[ric] = df

In [21]:
cols

['lag_1',
 'lag_2',
 'lag_3',
 'lag_4',
 'lag_5',
 'lag_6',
 'lag_7',
 'lag_8',
 'lag_9',
 'lag_10',
 'lag_11',
 'lag_12',
 'lag_13',
 'lag_14',
 'lag_15']

In [22]:
dfs.keys()

dict_keys(['SPY', 'AAPL.O', 'AMZN.O'])

In [23]:
dfs['AAPL.O'].head(7)

Unnamed: 0_level_0,AAPL.O,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,lag_11,lag_12,lag_13,lag_14,lag_15
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-03-22 11:10:00,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564,0.000512,-0.000461,5.1e-05,-0.000819,0.000563,-0.000102
2019-03-22 11:12:00,-0.000665,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564,0.000512,-0.000461,5.1e-05,-0.000819,0.000563
2019-03-22 11:13:00,-0.000256,-0.000665,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564,0.000512,-0.000461,5.1e-05,-0.000819
2019-03-22 11:14:00,0.000102,-0.000256,-0.000665,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564,0.000512,-0.000461,5.1e-05
2019-03-22 11:15:00,0.000461,0.000102,-0.000256,-0.000665,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564,0.000512,-0.000461
2019-03-22 11:16:00,0.000409,0.000461,0.000102,-0.000256,-0.000665,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564,0.000512
2019-03-22 11:17:00,-5.1e-05,0.000409,0.000461,0.000102,-0.000256,-0.000665,0.0,-0.000205,5.1e-05,-0.000307,0.00046,0.0,0.000819,0.000205,0.001076,-0.000564


In [24]:
np.sign(dfs['AAPL.O'].head(7))

Unnamed: 0_level_0,AAPL.O,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,lag_11,lag_12,lag_13,lag_14,lag_15
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-03-22 11:10:00,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0
2019-03-22 11:12:00,-1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0
2019-03-22 11:13:00,-1.0,-1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0
2019-03-22 11:14:00,1.0,-1.0,-1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0
2019-03-22 11:15:00,1.0,1.0,-1.0,-1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0,1.0,-1.0
2019-03-22 11:16:00,1.0,1.0,1.0,-1.0,-1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0,1.0
2019-03-22 11:17:00,-1.0,1.0,1.0,1.0,-1.0,-1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,-1.0


In [25]:
2 ** lags

32768

In [26]:
for ric in rics:
    model = SVC(C=100)
    df = dfs[ric].copy()
    model.fit(np.sign(df[cols]), np.sign(df[ric]))
    dfs[ric]['position'] = model.predict(np.sign(df[cols]))

In [27]:
for ric in rics:
    print('{:10} | {}'.format(ric, dfs[ric]['position'].values[:12]))

SPY        | [-1. -1. -1.  1.  1.  1.  1. -1. -1.  1.  1.  1.]
AAPL.O     | [ 0. -1. -1.  1.  1.  1. -1.  1. -1. -1.  1.  1.]
AMZN.O     | [-1.  0. -1.  1.  1. -1.  1.  1. -1.  1.  1.  1.]


In [28]:
for ric in rics:
    dfs[ric]['strategy'] = dfs[ric]['position'] * dfs[ric][ric]

In [29]:
for ric in rics:
    dfs[ric][[ric, 'strategy']].cumsum().apply(np.exp).iplot()

In [30]:
split = int(len(data) / 2)

In [31]:
vspan = [{'x0': data.index[0], 'x1': data.index[split], 'color': 'green', 'fill': True, 'opacity': .2},
        {'x0': data.index[split], 'x1': data.index[-1], 'color': 'red', 'fill': True, 'opacity': .2}]

In [32]:
data.normalize().iplot(vspan=vspan)

In [33]:
res = {}
for ric in rics:
    model = SVC(C=100)
    df = dfs[ric].copy()
    split = int(len(df) / 2)
    train_x = np.sign(df[cols]).iloc[:split]
    train_y = np.sign(df[ric]).iloc[:split]
    test_x = np.sign(df[cols]).iloc[split:]
    test_y = df[ric].iloc[split:]
    model.fit(train_x, train_y)
    pred = model.predict(test_x)
    strat = pred * test_y
    res[ric] = pd.DataFrame({ric: test_y, 'pred': pred, 'strategy': strat})

In [34]:
res['AAPL.O'].head()

Unnamed: 0_level_0,AAPL.O,pred,strategy
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-22 13:43:00,-0.000252,-1.0,0.000252
2019-03-22 13:44:00,-0.001218,1.0,-0.001218
2019-03-22 13:45:00,-0.001574,1.0,-0.001574
2019-03-22 13:46:00,0.000254,1.0,0.000254
2019-03-22 13:47:00,-0.002187,1.0,-0.002187


In [35]:
for ric in rics:
    res[ric][[ric, 'strategy']].cumsum().apply(np.exp).iplot()