## Quick and dirty model using SVC

The data file is sourced from quandl. It is already sorted by date descending.

Contains the following attributes

    Date
    Open 
    High
    Low
    Last
    Close
    Total Trade Quantity
    Turnover (Lacs)

### Import pandas, Load the file and print a few rows

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('data/NSE-HINDUNILVR.csv')

In [3]:
df.tail(6)

Unnamed: 0,Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
2581,2007-07-26,204.0,208.0,198.1,205.5,205.7,3151821.0,6422.44
2582,2007-07-25,202.8,204.0,198.75,201.5,201.95,2531149.0,5110.28
2583,2007-07-24,205.9,205.9,197.85,200.25,199.75,1938559.0,3906.08
2584,2007-07-23,199.5,205.8,198.0,203.8,204.85,6892251.0,13974.06
2585,2007-07-20,198.0,199.9,192.55,194.2,194.15,2678230.0,5204.27
2586,2007-07-19,199.0,199.75,194.65,198.5,197.8,2066807.0,4073.72


### Set date as the index 

In [4]:
df.set_index('Date',inplace=True)

In [5]:
df.tail(6)

Unnamed: 0_level_0,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2007-07-26,204.0,208.0,198.1,205.5,205.7,3151821.0,6422.44
2007-07-25,202.8,204.0,198.75,201.5,201.95,2531149.0,5110.28
2007-07-24,205.9,205.9,197.85,200.25,199.75,1938559.0,3906.08
2007-07-23,199.5,205.8,198.0,203.8,204.85,6892251.0,13974.06
2007-07-20,198.0,199.9,192.55,194.2,194.15,2678230.0,5204.27
2007-07-19,199.0,199.75,194.65,198.5,197.8,2066807.0,4073.72


### For all the columns create lagged columns for 5 days

In [6]:
column_names =['Open','High','Low','Last','Close','Total Trade Quantity','Turnover (Lacs)']

In [7]:
for column in column_names:
    for i in range(5):
        new_column_name=column+"Lag_"+str(i+1)
        df[new_column_name]=df[column].shift(-1*(i+1))

In [8]:
df.tail(6)

Unnamed: 0_level_0,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs),OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
2007-07-26,204.0,208.0,198.1,205.5,205.7,3151821.0,6422.44,202.8,205.9,199.5,198.0,199.0,204.0,205.9,205.8,199.9,199.75,198.75,197.85,198.0,192.55,194.65,201.5,200.25,203.8,194.2,198.5,201.95,199.75,204.85,194.15,197.8,2531149.0,1938559.0,6892251.0,2678230.0,2066807.0,5110.28,3906.08,13974.06,5204.27,4073.72
2007-07-25,202.8,204.0,198.75,201.5,201.95,2531149.0,5110.28,205.9,199.5,198.0,199.0,,205.9,205.8,199.9,199.75,,197.85,198.0,192.55,194.65,,200.25,203.8,194.2,198.5,,199.75,204.85,194.15,197.8,,1938559.0,6892251.0,2678230.0,2066807.0,,3906.08,13974.06,5204.27,4073.72,
2007-07-24,205.9,205.9,197.85,200.25,199.75,1938559.0,3906.08,199.5,198.0,199.0,,,205.8,199.9,199.75,,,198.0,192.55,194.65,,,203.8,194.2,198.5,,,204.85,194.15,197.8,,,6892251.0,2678230.0,2066807.0,,,13974.06,5204.27,4073.72,,
2007-07-23,199.5,205.8,198.0,203.8,204.85,6892251.0,13974.06,198.0,199.0,,,,199.9,199.75,,,,192.55,194.65,,,,194.2,198.5,,,,194.15,197.8,,,,2678230.0,2066807.0,,,,5204.27,4073.72,,,
2007-07-20,198.0,199.9,192.55,194.2,194.15,2678230.0,5204.27,199.0,,,,,199.75,,,,,194.65,,,,,198.5,,,,,197.8,,,,,2066807.0,,,,,4073.72,,,,
2007-07-19,199.0,199.75,194.65,198.5,197.8,2066807.0,4073.72,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
df.head(6)

Unnamed: 0_level_0,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs),OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
2017-12-26,1350.4,1353.9,1340.0,1348.0,1348.1,913529.0,12307.33,1341.3,1365.0,1346.2,1333.1,1320.0,1359.5,1365.15,1367.95,1352.0,1345.0,1341.3,1345.1,1333.5,1326.5,1300.95,1350.4,1346.45,1367.15,1352.0,1333.0,1356.5,1348.45,1362.65,1349.7,1331.75,561400.0,557871.0,1198943.0,1227087.0,918171.0,7604.01,7532.59,16226.24,16453.06,12216.19
2017-12-22,1341.3,1359.5,1341.3,1350.4,1356.5,561400.0,7604.01,1365.0,1346.2,1333.1,1320.0,1328.3,1365.15,1367.95,1352.0,1345.0,1335.0,1345.1,1333.5,1326.5,1300.95,1310.6,1346.45,1367.15,1352.0,1333.0,1323.3,1348.45,1362.65,1349.7,1331.75,1324.55,557871.0,1198943.0,1227087.0,918171.0,1420357.0,7532.59,16226.24,16453.06,12216.19,18787.09
2017-12-21,1365.0,1365.15,1345.1,1346.45,1348.45,557871.0,7532.59,1346.2,1333.1,1320.0,1328.3,1314.0,1367.95,1352.0,1345.0,1335.0,1326.85,1333.5,1326.5,1300.95,1310.6,1304.25,1367.15,1352.0,1333.0,1323.3,1321.1,1362.65,1349.7,1331.75,1324.55,1321.6,1198943.0,1227087.0,918171.0,1420357.0,834383.0,16226.24,16453.06,12216.19,18787.09,10975.37
2017-12-20,1346.2,1367.95,1333.5,1367.15,1362.65,1198943.0,16226.24,1333.1,1320.0,1328.3,1314.0,1319.4,1352.0,1345.0,1335.0,1326.85,1337.0,1326.5,1300.95,1310.6,1304.25,1306.2,1352.0,1333.0,1323.3,1321.1,1320.0,1349.7,1331.75,1324.55,1321.6,1315.05,1227087.0,918171.0,1420357.0,834383.0,917877.0,16453.06,12216.19,18787.09,10975.37,12110.0
2017-12-19,1333.1,1352.0,1326.5,1352.0,1349.7,1227087.0,16453.06,1320.0,1328.3,1314.0,1319.4,1319.1,1345.0,1335.0,1326.85,1337.0,1324.0,1300.95,1310.6,1304.25,1306.2,1308.65,1333.0,1323.3,1321.1,1320.0,1315.0,1331.75,1324.55,1321.6,1315.05,1314.65,918171.0,1420357.0,834383.0,917877.0,873443.0,12216.19,18787.09,10975.37,12110.0,11502.48
2017-12-18,1320.0,1345.0,1300.95,1333.0,1331.75,918171.0,12216.19,1328.3,1314.0,1319.4,1319.1,1329.0,1335.0,1326.85,1337.0,1324.0,1329.0,1310.6,1304.25,1306.2,1308.65,1311.65,1323.3,1321.1,1320.0,1315.0,1324.05,1324.55,1321.6,1315.05,1314.65,1324.05,1420357.0,834383.0,917877.0,873443.0,597953.0,18787.09,10975.37,12110.0,11502.48,7898.88


In [10]:
#df.to_csv('data/NSE-HINDUNILVR_withlag.csv')

In [11]:
df.shape

(2587, 42)

### Transform Data. Drop redundant columns, Create target categorical column. Split train/test

In [12]:
df=df.drop(['Open', 'High','Low','Last','Total Trade Quantity','Turnover (Lacs)'], axis=1)

In [13]:
df['percent_change']=((df['Close']-df['CloseLag_1'])/df['CloseLag_1'])*100

In [14]:
df.head(5)

Unnamed: 0_level_0,Close,OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5,percent_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2017-12-26,1348.1,1341.3,1365.0,1346.2,1333.1,1320.0,1359.5,1365.15,1367.95,1352.0,1345.0,1341.3,1345.1,1333.5,1326.5,1300.95,1350.4,1346.45,1367.15,1352.0,1333.0,1356.5,1348.45,1362.65,1349.7,1331.75,561400.0,557871.0,1198943.0,1227087.0,918171.0,7604.01,7532.59,16226.24,16453.06,12216.19,-0.619241
2017-12-22,1356.5,1365.0,1346.2,1333.1,1320.0,1328.3,1365.15,1367.95,1352.0,1345.0,1335.0,1345.1,1333.5,1326.5,1300.95,1310.6,1346.45,1367.15,1352.0,1333.0,1323.3,1348.45,1362.65,1349.7,1331.75,1324.55,557871.0,1198943.0,1227087.0,918171.0,1420357.0,7532.59,16226.24,16453.06,12216.19,18787.09,0.596982
2017-12-21,1348.45,1346.2,1333.1,1320.0,1328.3,1314.0,1367.95,1352.0,1345.0,1335.0,1326.85,1333.5,1326.5,1300.95,1310.6,1304.25,1367.15,1352.0,1333.0,1323.3,1321.1,1362.65,1349.7,1331.75,1324.55,1321.6,1198943.0,1227087.0,918171.0,1420357.0,834383.0,16226.24,16453.06,12216.19,18787.09,10975.37,-1.042087
2017-12-20,1362.65,1333.1,1320.0,1328.3,1314.0,1319.4,1352.0,1345.0,1335.0,1326.85,1337.0,1326.5,1300.95,1310.6,1304.25,1306.2,1352.0,1333.0,1323.3,1321.1,1320.0,1349.7,1331.75,1324.55,1321.6,1315.05,1227087.0,918171.0,1420357.0,834383.0,917877.0,16453.06,12216.19,18787.09,10975.37,12110.0,0.959472
2017-12-19,1349.7,1320.0,1328.3,1314.0,1319.4,1319.1,1345.0,1335.0,1326.85,1337.0,1324.0,1300.95,1310.6,1304.25,1306.2,1308.65,1333.0,1323.3,1321.1,1320.0,1315.0,1331.75,1324.55,1321.6,1315.05,1314.65,918171.0,1420357.0,834383.0,917877.0,873443.0,12216.19,18787.09,10975.37,12110.0,11502.48,1.347851


Use bins to create 6 categories

In [15]:
bins = [-100.0,0.0,100.0]

In [16]:
group_names = ['Down', 'Up']

In [17]:
df['trend'] = pd.cut(df['percent_change'], bins, labels=group_names)

In [18]:
df

Unnamed: 0_level_0,Close,OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5,percent_change,trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2017-12-26,1348.10,1341.30,1365.00,1346.20,1333.10,1320.00,1359.50,1365.15,1367.95,1352.00,1345.00,1341.30,1345.10,1333.50,1326.50,1300.95,1350.40,1346.45,1367.15,1352.00,1333.00,1356.50,1348.45,1362.65,1349.70,1331.75,561400.0,557871.0,1198943.0,1227087.0,918171.0,7604.01,7532.59,16226.24,16453.06,12216.19,-0.619241,Down
2017-12-22,1356.50,1365.00,1346.20,1333.10,1320.00,1328.30,1365.15,1367.95,1352.00,1345.00,1335.00,1345.10,1333.50,1326.50,1300.95,1310.60,1346.45,1367.15,1352.00,1333.00,1323.30,1348.45,1362.65,1349.70,1331.75,1324.55,557871.0,1198943.0,1227087.0,918171.0,1420357.0,7532.59,16226.24,16453.06,12216.19,18787.09,0.596982,Up
2017-12-21,1348.45,1346.20,1333.10,1320.00,1328.30,1314.00,1367.95,1352.00,1345.00,1335.00,1326.85,1333.50,1326.50,1300.95,1310.60,1304.25,1367.15,1352.00,1333.00,1323.30,1321.10,1362.65,1349.70,1331.75,1324.55,1321.60,1198943.0,1227087.0,918171.0,1420357.0,834383.0,16226.24,16453.06,12216.19,18787.09,10975.37,-1.042087,Down
2017-12-20,1362.65,1333.10,1320.00,1328.30,1314.00,1319.40,1352.00,1345.00,1335.00,1326.85,1337.00,1326.50,1300.95,1310.60,1304.25,1306.20,1352.00,1333.00,1323.30,1321.10,1320.00,1349.70,1331.75,1324.55,1321.60,1315.05,1227087.0,918171.0,1420357.0,834383.0,917877.0,16453.06,12216.19,18787.09,10975.37,12110.00,0.959472,Up
2017-12-19,1349.70,1320.00,1328.30,1314.00,1319.40,1319.10,1345.00,1335.00,1326.85,1337.00,1324.00,1300.95,1310.60,1304.25,1306.20,1308.65,1333.00,1323.30,1321.10,1320.00,1315.00,1331.75,1324.55,1321.60,1315.05,1314.65,918171.0,1420357.0,834383.0,917877.0,873443.0,12216.19,18787.09,10975.37,12110.00,11502.48,1.347851,Up
2017-12-18,1331.75,1328.30,1314.00,1319.40,1319.10,1329.00,1335.00,1326.85,1337.00,1324.00,1329.00,1310.60,1304.25,1306.20,1308.65,1311.65,1323.30,1321.10,1320.00,1315.00,1324.05,1324.55,1321.60,1315.05,1314.65,1324.05,1420357.0,834383.0,917877.0,873443.0,597953.0,18787.09,10975.37,12110.00,11502.48,7898.88,0.543581,Up
2017-12-15,1324.55,1314.00,1319.40,1319.10,1329.00,1296.40,1326.85,1337.00,1324.00,1329.00,1331.00,1304.25,1306.20,1308.65,1311.65,1291.30,1321.10,1320.00,1315.00,1324.05,1318.00,1321.60,1315.05,1314.65,1324.05,1326.65,834383.0,917877.0,873443.0,597953.0,1267860.0,10975.37,12110.00,11502.48,7898.88,16686.38,0.223214,Up
2017-12-14,1321.60,1319.40,1319.10,1329.00,1296.40,1277.20,1337.00,1324.00,1329.00,1331.00,1298.85,1306.20,1308.65,1311.65,1291.30,1272.05,1320.00,1315.00,1324.05,1318.00,1296.00,1315.05,1314.65,1324.05,1326.65,1291.95,917877.0,873443.0,597953.0,1267860.0,1407824.0,12110.00,11502.48,7898.88,16686.38,18088.95,0.498080,Up
2017-12-13,1315.05,1319.10,1329.00,1296.40,1277.20,1258.25,1324.00,1329.00,1331.00,1298.85,1282.30,1308.65,1311.65,1291.30,1272.05,1255.60,1315.00,1324.05,1318.00,1296.00,1274.90,1314.65,1324.05,1326.65,1291.95,1275.60,873443.0,597953.0,1267860.0,1407824.0,1516859.0,11502.48,7898.88,16686.38,18088.95,19242.31,0.030426,Up
2017-12-12,1314.65,1329.00,1296.40,1277.20,1258.25,1274.00,1329.00,1331.00,1298.85,1282.30,1274.00,1311.65,1291.30,1272.05,1255.60,1255.00,1324.05,1318.00,1296.00,1274.90,1262.45,1324.05,1326.65,1291.95,1275.60,1261.00,597953.0,1267860.0,1407824.0,1516859.0,862459.0,7898.88,16686.38,18088.95,19242.31,10883.58,-0.709943,Down


In [19]:
df=df.drop(['Close','percent_change'], axis=1)

In [20]:
df

Unnamed: 0_level_0,OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5,trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
2017-12-26,1341.30,1365.00,1346.20,1333.10,1320.00,1359.50,1365.15,1367.95,1352.00,1345.00,1341.30,1345.10,1333.50,1326.50,1300.95,1350.40,1346.45,1367.15,1352.00,1333.00,1356.50,1348.45,1362.65,1349.70,1331.75,561400.0,557871.0,1198943.0,1227087.0,918171.0,7604.01,7532.59,16226.24,16453.06,12216.19,Down
2017-12-22,1365.00,1346.20,1333.10,1320.00,1328.30,1365.15,1367.95,1352.00,1345.00,1335.00,1345.10,1333.50,1326.50,1300.95,1310.60,1346.45,1367.15,1352.00,1333.00,1323.30,1348.45,1362.65,1349.70,1331.75,1324.55,557871.0,1198943.0,1227087.0,918171.0,1420357.0,7532.59,16226.24,16453.06,12216.19,18787.09,Up
2017-12-21,1346.20,1333.10,1320.00,1328.30,1314.00,1367.95,1352.00,1345.00,1335.00,1326.85,1333.50,1326.50,1300.95,1310.60,1304.25,1367.15,1352.00,1333.00,1323.30,1321.10,1362.65,1349.70,1331.75,1324.55,1321.60,1198943.0,1227087.0,918171.0,1420357.0,834383.0,16226.24,16453.06,12216.19,18787.09,10975.37,Down
2017-12-20,1333.10,1320.00,1328.30,1314.00,1319.40,1352.00,1345.00,1335.00,1326.85,1337.00,1326.50,1300.95,1310.60,1304.25,1306.20,1352.00,1333.00,1323.30,1321.10,1320.00,1349.70,1331.75,1324.55,1321.60,1315.05,1227087.0,918171.0,1420357.0,834383.0,917877.0,16453.06,12216.19,18787.09,10975.37,12110.00,Up
2017-12-19,1320.00,1328.30,1314.00,1319.40,1319.10,1345.00,1335.00,1326.85,1337.00,1324.00,1300.95,1310.60,1304.25,1306.20,1308.65,1333.00,1323.30,1321.10,1320.00,1315.00,1331.75,1324.55,1321.60,1315.05,1314.65,918171.0,1420357.0,834383.0,917877.0,873443.0,12216.19,18787.09,10975.37,12110.00,11502.48,Up
2017-12-18,1328.30,1314.00,1319.40,1319.10,1329.00,1335.00,1326.85,1337.00,1324.00,1329.00,1310.60,1304.25,1306.20,1308.65,1311.65,1323.30,1321.10,1320.00,1315.00,1324.05,1324.55,1321.60,1315.05,1314.65,1324.05,1420357.0,834383.0,917877.0,873443.0,597953.0,18787.09,10975.37,12110.00,11502.48,7898.88,Up
2017-12-15,1314.00,1319.40,1319.10,1329.00,1296.40,1326.85,1337.00,1324.00,1329.00,1331.00,1304.25,1306.20,1308.65,1311.65,1291.30,1321.10,1320.00,1315.00,1324.05,1318.00,1321.60,1315.05,1314.65,1324.05,1326.65,834383.0,917877.0,873443.0,597953.0,1267860.0,10975.37,12110.00,11502.48,7898.88,16686.38,Up
2017-12-14,1319.40,1319.10,1329.00,1296.40,1277.20,1337.00,1324.00,1329.00,1331.00,1298.85,1306.20,1308.65,1311.65,1291.30,1272.05,1320.00,1315.00,1324.05,1318.00,1296.00,1315.05,1314.65,1324.05,1326.65,1291.95,917877.0,873443.0,597953.0,1267860.0,1407824.0,12110.00,11502.48,7898.88,16686.38,18088.95,Up
2017-12-13,1319.10,1329.00,1296.40,1277.20,1258.25,1324.00,1329.00,1331.00,1298.85,1282.30,1308.65,1311.65,1291.30,1272.05,1255.60,1315.00,1324.05,1318.00,1296.00,1274.90,1314.65,1324.05,1326.65,1291.95,1275.60,873443.0,597953.0,1267860.0,1407824.0,1516859.0,11502.48,7898.88,16686.38,18088.95,19242.31,Up
2017-12-12,1329.00,1296.40,1277.20,1258.25,1274.00,1329.00,1331.00,1298.85,1282.30,1274.00,1311.65,1291.30,1272.05,1255.60,1255.00,1324.05,1318.00,1296.00,1274.90,1262.45,1324.05,1326.65,1291.95,1275.60,1261.00,597953.0,1267860.0,1407824.0,1516859.0,862459.0,7898.88,16686.38,18088.95,19242.31,10883.58,Down


Drop last five rows with NA

In [21]:
df=df.dropna()

In [22]:
df

Unnamed: 0_level_0,OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5,trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
2017-12-26,1341.30,1365.00,1346.20,1333.10,1320.00,1359.50,1365.15,1367.95,1352.00,1345.00,1341.30,1345.10,1333.50,1326.50,1300.95,1350.40,1346.45,1367.15,1352.00,1333.00,1356.50,1348.45,1362.65,1349.70,1331.75,561400.0,557871.0,1198943.0,1227087.0,918171.0,7604.01,7532.59,16226.24,16453.06,12216.19,Down
2017-12-22,1365.00,1346.20,1333.10,1320.00,1328.30,1365.15,1367.95,1352.00,1345.00,1335.00,1345.10,1333.50,1326.50,1300.95,1310.60,1346.45,1367.15,1352.00,1333.00,1323.30,1348.45,1362.65,1349.70,1331.75,1324.55,557871.0,1198943.0,1227087.0,918171.0,1420357.0,7532.59,16226.24,16453.06,12216.19,18787.09,Up
2017-12-21,1346.20,1333.10,1320.00,1328.30,1314.00,1367.95,1352.00,1345.00,1335.00,1326.85,1333.50,1326.50,1300.95,1310.60,1304.25,1367.15,1352.00,1333.00,1323.30,1321.10,1362.65,1349.70,1331.75,1324.55,1321.60,1198943.0,1227087.0,918171.0,1420357.0,834383.0,16226.24,16453.06,12216.19,18787.09,10975.37,Down
2017-12-20,1333.10,1320.00,1328.30,1314.00,1319.40,1352.00,1345.00,1335.00,1326.85,1337.00,1326.50,1300.95,1310.60,1304.25,1306.20,1352.00,1333.00,1323.30,1321.10,1320.00,1349.70,1331.75,1324.55,1321.60,1315.05,1227087.0,918171.0,1420357.0,834383.0,917877.0,16453.06,12216.19,18787.09,10975.37,12110.00,Up
2017-12-19,1320.00,1328.30,1314.00,1319.40,1319.10,1345.00,1335.00,1326.85,1337.00,1324.00,1300.95,1310.60,1304.25,1306.20,1308.65,1333.00,1323.30,1321.10,1320.00,1315.00,1331.75,1324.55,1321.60,1315.05,1314.65,918171.0,1420357.0,834383.0,917877.0,873443.0,12216.19,18787.09,10975.37,12110.00,11502.48,Up
2017-12-18,1328.30,1314.00,1319.40,1319.10,1329.00,1335.00,1326.85,1337.00,1324.00,1329.00,1310.60,1304.25,1306.20,1308.65,1311.65,1323.30,1321.10,1320.00,1315.00,1324.05,1324.55,1321.60,1315.05,1314.65,1324.05,1420357.0,834383.0,917877.0,873443.0,597953.0,18787.09,10975.37,12110.00,11502.48,7898.88,Up
2017-12-15,1314.00,1319.40,1319.10,1329.00,1296.40,1326.85,1337.00,1324.00,1329.00,1331.00,1304.25,1306.20,1308.65,1311.65,1291.30,1321.10,1320.00,1315.00,1324.05,1318.00,1321.60,1315.05,1314.65,1324.05,1326.65,834383.0,917877.0,873443.0,597953.0,1267860.0,10975.37,12110.00,11502.48,7898.88,16686.38,Up
2017-12-14,1319.40,1319.10,1329.00,1296.40,1277.20,1337.00,1324.00,1329.00,1331.00,1298.85,1306.20,1308.65,1311.65,1291.30,1272.05,1320.00,1315.00,1324.05,1318.00,1296.00,1315.05,1314.65,1324.05,1326.65,1291.95,917877.0,873443.0,597953.0,1267860.0,1407824.0,12110.00,11502.48,7898.88,16686.38,18088.95,Up
2017-12-13,1319.10,1329.00,1296.40,1277.20,1258.25,1324.00,1329.00,1331.00,1298.85,1282.30,1308.65,1311.65,1291.30,1272.05,1255.60,1315.00,1324.05,1318.00,1296.00,1274.90,1314.65,1324.05,1326.65,1291.95,1275.60,873443.0,597953.0,1267860.0,1407824.0,1516859.0,11502.48,7898.88,16686.38,18088.95,19242.31,Up
2017-12-12,1329.00,1296.40,1277.20,1258.25,1274.00,1329.00,1331.00,1298.85,1282.30,1274.00,1311.65,1291.30,1272.05,1255.60,1255.00,1324.05,1318.00,1296.00,1274.90,1262.45,1324.05,1326.65,1291.95,1275.60,1261.00,597953.0,1267860.0,1407824.0,1516859.0,862459.0,7898.88,16686.38,18088.95,19242.31,10883.58,Down


In [23]:
X=df.reset_index()

In [24]:
X

Unnamed: 0,Date,OpenLag_1,OpenLag_2,OpenLag_3,OpenLag_4,OpenLag_5,HighLag_1,HighLag_2,HighLag_3,HighLag_4,HighLag_5,LowLag_1,LowLag_2,LowLag_3,LowLag_4,LowLag_5,LastLag_1,LastLag_2,LastLag_3,LastLag_4,LastLag_5,CloseLag_1,CloseLag_2,CloseLag_3,CloseLag_4,CloseLag_5,Total Trade QuantityLag_1,Total Trade QuantityLag_2,Total Trade QuantityLag_3,Total Trade QuantityLag_4,Total Trade QuantityLag_5,Turnover (Lacs)Lag_1,Turnover (Lacs)Lag_2,Turnover (Lacs)Lag_3,Turnover (Lacs)Lag_4,Turnover (Lacs)Lag_5,trend
0,2017-12-26,1341.30,1365.00,1346.20,1333.10,1320.00,1359.50,1365.15,1367.95,1352.00,1345.00,1341.30,1345.10,1333.50,1326.50,1300.95,1350.40,1346.45,1367.15,1352.00,1333.00,1356.50,1348.45,1362.65,1349.70,1331.75,561400.0,557871.0,1198943.0,1227087.0,918171.0,7604.01,7532.59,16226.24,16453.06,12216.19,Down
1,2017-12-22,1365.00,1346.20,1333.10,1320.00,1328.30,1365.15,1367.95,1352.00,1345.00,1335.00,1345.10,1333.50,1326.50,1300.95,1310.60,1346.45,1367.15,1352.00,1333.00,1323.30,1348.45,1362.65,1349.70,1331.75,1324.55,557871.0,1198943.0,1227087.0,918171.0,1420357.0,7532.59,16226.24,16453.06,12216.19,18787.09,Up
2,2017-12-21,1346.20,1333.10,1320.00,1328.30,1314.00,1367.95,1352.00,1345.00,1335.00,1326.85,1333.50,1326.50,1300.95,1310.60,1304.25,1367.15,1352.00,1333.00,1323.30,1321.10,1362.65,1349.70,1331.75,1324.55,1321.60,1198943.0,1227087.0,918171.0,1420357.0,834383.0,16226.24,16453.06,12216.19,18787.09,10975.37,Down
3,2017-12-20,1333.10,1320.00,1328.30,1314.00,1319.40,1352.00,1345.00,1335.00,1326.85,1337.00,1326.50,1300.95,1310.60,1304.25,1306.20,1352.00,1333.00,1323.30,1321.10,1320.00,1349.70,1331.75,1324.55,1321.60,1315.05,1227087.0,918171.0,1420357.0,834383.0,917877.0,16453.06,12216.19,18787.09,10975.37,12110.00,Up
4,2017-12-19,1320.00,1328.30,1314.00,1319.40,1319.10,1345.00,1335.00,1326.85,1337.00,1324.00,1300.95,1310.60,1304.25,1306.20,1308.65,1333.00,1323.30,1321.10,1320.00,1315.00,1331.75,1324.55,1321.60,1315.05,1314.65,918171.0,1420357.0,834383.0,917877.0,873443.0,12216.19,18787.09,10975.37,12110.00,11502.48,Up
5,2017-12-18,1328.30,1314.00,1319.40,1319.10,1329.00,1335.00,1326.85,1337.00,1324.00,1329.00,1310.60,1304.25,1306.20,1308.65,1311.65,1323.30,1321.10,1320.00,1315.00,1324.05,1324.55,1321.60,1315.05,1314.65,1324.05,1420357.0,834383.0,917877.0,873443.0,597953.0,18787.09,10975.37,12110.00,11502.48,7898.88,Up
6,2017-12-15,1314.00,1319.40,1319.10,1329.00,1296.40,1326.85,1337.00,1324.00,1329.00,1331.00,1304.25,1306.20,1308.65,1311.65,1291.30,1321.10,1320.00,1315.00,1324.05,1318.00,1321.60,1315.05,1314.65,1324.05,1326.65,834383.0,917877.0,873443.0,597953.0,1267860.0,10975.37,12110.00,11502.48,7898.88,16686.38,Up
7,2017-12-14,1319.40,1319.10,1329.00,1296.40,1277.20,1337.00,1324.00,1329.00,1331.00,1298.85,1306.20,1308.65,1311.65,1291.30,1272.05,1320.00,1315.00,1324.05,1318.00,1296.00,1315.05,1314.65,1324.05,1326.65,1291.95,917877.0,873443.0,597953.0,1267860.0,1407824.0,12110.00,11502.48,7898.88,16686.38,18088.95,Up
8,2017-12-13,1319.10,1329.00,1296.40,1277.20,1258.25,1324.00,1329.00,1331.00,1298.85,1282.30,1308.65,1311.65,1291.30,1272.05,1255.60,1315.00,1324.05,1318.00,1296.00,1274.90,1314.65,1324.05,1326.65,1291.95,1275.60,873443.0,597953.0,1267860.0,1407824.0,1516859.0,11502.48,7898.88,16686.38,18088.95,19242.31,Up
9,2017-12-12,1329.00,1296.40,1277.20,1258.25,1274.00,1329.00,1331.00,1298.85,1282.30,1274.00,1311.65,1291.30,1272.05,1255.60,1255.00,1324.05,1318.00,1296.00,1274.90,1262.45,1324.05,1326.65,1291.95,1275.60,1261.00,597953.0,1267860.0,1407824.0,1516859.0,862459.0,7898.88,16686.38,18088.95,19242.31,10883.58,Down


In [25]:
X['trend'].value_counts()

Up      1302
Down    1280
Name: trend, dtype: int64

In [26]:
y=X['trend']

In [27]:
X=X.drop(['Date','trend'],axis=1)

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Use SVC to create first model. Do scaling before that

In [29]:
from sklearn import preprocessing
scaler=preprocessing.StandardScaler().fit(X_train)

In [30]:
X_train_scaled=scaler.transform(X_train) 
X_test_scaled=scaler.transform(X_test)

In [31]:
X_train_scaled

array([[ 1.04837844,  1.05859744,  1.05207251, ..., -0.79118413,
        -0.37377526, -0.60809526],
       [ 1.16719058,  1.18549999,  1.15192455, ...,  0.88911116,
        -0.37334405, -0.06359347],
       [-0.47829004, -0.45206684, -0.48057179, ...,  0.65853051,
        -0.15762027,  0.28454589],
       ..., 
       [ 0.08066706,  0.0886631 ,  0.09755463, ..., -0.53828148,
        -0.53089841, -0.30328266],
       [ 0.93091644,  0.93135692,  0.95729768, ...,  0.12077471,
        -0.21063189, -0.41623728],
       [ 0.2963516 ,  0.29177478,  0.31807534, ..., -0.7521404 ,
        -0.56437729, -0.71009694]])

In [47]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [48]:
random_state=42

In [50]:
names = ["Nearest Neighbors", "Poly SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="poly",degree=5,random_state=random_state),
    SVC(gamma=2, C=1,random_state=random_state),
    GaussianProcessClassifier(1.0 * RBF(1.0),random_state=random_state),
    DecisionTreeClassifier(max_depth=5,random_state=random_state),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1,random_state=random_state),
    MLPClassifier(alpha=1,random_state=random_state),
    AdaBoostClassifier(random_state=random_state),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

In [52]:
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print("Score for {} is  {} ".format(name, score))

Score for Nearest Neighbors is  0.504835589941973 
Score for Poly SVM is  0.5067698259187621 
Score for RBF SVM is  0.5067698259187621 
Score for Gaussian Process is  0.4932301740812379 
Score for Decision Tree is  0.48936170212765956 
Score for Random Forest is  0.4990328820116054 
Score for Neural Net is  0.5164410058027079 
Score for AdaBoost is  0.5319148936170213 
Score for Naive Bayes is  0.504835589941973 
Score for QDA is  0.5067698259187621 
