In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#read in our data
url="https://raw.githubusercontent.com/AnuDesmond/Predict-Stock-Signal/master/Sample%20Stock.csv"
stock=pd.read_csv(url)

In [3]:
#remove datetime column
X=stock.iloc[:,1:73]
#add a lag variable to the dataset 
X['L1']=X['Y'].shift(1)
X=X.dropna()
X.head()

Unnamed: 0,F0,F1,F2,F3,F5,F6,F7,F8,F9,F10,...,F64,F65,F66,F67,F68,F69,F70,F71,Y,L1
1,12.686,0.088,-0.1,56.1,279.471,56.1,1.43,-1.86,6.63,-0.67,...,0.825,0.38,1.03,1.25,1.225,-0.053,0.357,0.192,1,0.0
2,12.67,0.088,0.0,67.4,279.113,67.4,0.11,-1.59,6.18,-0.71,...,0.832,0.374,1.142,1.541,1.38,-0.091,0.06,0.033,1,1.0
3,12.683,0.088,0.1,72.2,279.418,72.2,1.16,-1.13,8.12,0.11,...,1.08,0.286,0.829,1.209,1.38,-0.345,-0.058,-0.076,1,1.0
4,12.675,0.088,0.0,74.9,279.246,74.9,1.39,-1.4,7.35,0.4,...,0.986,0.328,0.91,1.34,1.368,-0.358,0.192,0.096,1,1.0
5,12.645,0.088,-0.1,80.3,278.584,80.3,1.16,-2.45,6.76,-1.63,...,0.829,0.368,0.77,1.882,1.524,-0.374,0.041,0.021,1,1.0


In [4]:
X.shape

(8426, 73)

In [5]:
#prepare the data for machine learning
x=X.drop('Y', axis=1)
y=X['Y']

In [6]:
#split data set into traing dataset and test dataset
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)

In [7]:
#scaling my data so that I can speed up in machine learing process
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(x_train)
x_train = scaling.transform(x_train)
x_test = scaling.transform(x_test)

In [8]:
#conduct svm
from sklearn.svm import SVC 
svclassifier = SVC(kernel='linear')  
svclassifier.fit(x_train, y_train)  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
#check our precision
y_pred = svclassifier.predict(x_test)  
from sklearn.metrics import classification_report, confusion_matrix 
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred))  

[[749 180]
 [185 572]]
             precision    recall  f1-score   support

          0       0.80      0.81      0.80       929
          1       0.76      0.76      0.76       757

avg / total       0.78      0.78      0.78      1686



In [10]:
#change kernel type
svclassifier = SVC(kernel='sigmoid')  
svclassifier.fit(x_train, y_train)  
y_pred = svclassifier.predict(x_test) 
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred))  

[[741 188]
 [184 573]]
             precision    recall  f1-score   support

          0       0.80      0.80      0.80       929
          1       0.75      0.76      0.75       757

avg / total       0.78      0.78      0.78      1686



In [11]:
#see how many groups of time in the dataset
import datetime
stock['datetime']=pd.to_datetime(stock.datetime)
grp = stock.groupby(by=[stock.datetime.map(lambda x : (x.hour, x.minute))])
grp.count()

Unnamed: 0_level_0,datetime,F0,F1,F2,F3,F5,F6,F7,F8,F9,...,F63,F64,F65,F66,F67,F68,F69,F70,F71,Y
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(9, 30)",650,650,650,650,650,650,650,650,650,650,...,650,650,650,650,650,650,650,650,650,650
"(10, 0)",650,650,650,650,650,650,650,650,650,650,...,650,650,650,650,650,650,650,650,650,650
"(10, 30)",649,649,649,649,649,649,649,649,649,649,...,649,649,649,649,649,649,649,649,649,649
"(11, 0)",649,649,649,649,649,649,649,649,649,649,...,649,649,649,649,649,649,649,649,649,649
"(11, 30)",649,649,649,649,649,649,649,649,649,649,...,649,649,649,649,649,649,649,649,649,649
"(12, 0)",648,648,648,648,648,648,648,648,648,648,...,648,648,648,648,648,648,648,648,648,648
"(12, 30)",648,648,648,648,648,648,648,648,648,648,...,648,648,648,648,648,648,648,648,648,648
"(13, 0)",648,648,648,648,648,648,648,648,648,648,...,648,648,648,648,648,648,648,648,648,648
"(13, 30)",648,648,648,648,648,648,648,648,648,648,...,648,648,648,648,648,648,648,648,648,648
"(14, 0)",647,647,647,647,647,647,647,647,647,647,...,647,647,647,647,647,647,647,647,647,647


In [12]:
#take time 9:30 as an example
import datetime as dt
X930=stock[stock.datetime.dt.hour==9][stock.datetime.dt.minute==30]
x=X930.drop(['datetime', 'Y'], axis=1)
y=X930['Y']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)  
scaling = MinMaxScaler(feature_range=(-1,1)).fit(x_train)
x_train = scaling.transform(x_train)
x_test = scaling.transform(x_test)
svclassifier = SVC(kernel='sigmoid')  
svclassifier.fit(x_train, y_train)
y_pred = svclassifier.predict(x_test) 
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred))  

[[70  3]
 [53  4]]
             precision    recall  f1-score   support

          0       0.57      0.96      0.71        73
          1       0.57      0.07      0.12        57

avg / total       0.57      0.57      0.46       130



  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
#using desition trees method 
url="https://raw.githubusercontent.com/AnuDesmond/Predict-Stock-Signal/master/Sample%20Stock.csv"
stock=pd.read_csv(url)
X=stock.iloc[:,1:73]
X['L1']=X['Y'].shift(1)
X=X.dropna()
x=X.drop('Y', axis=1)
y=X['Y']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)  
scaling = MinMaxScaler(feature_range=(-1,1)).fit(x_train)
x_train = scaling.transform(x_train)
x_test = scaling.transform(x_test)

In [14]:
from sklearn import tree
classifier = tree.DecisionTreeClassifier()
classifier.fit(x_train, y_train)  
y_pred = classifier.predict(x_test)
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred))  

[[727 208]
 [190 561]]
             precision    recall  f1-score   support

          0       0.79      0.78      0.79       935
          1       0.73      0.75      0.74       751

avg / total       0.76      0.76      0.76      1686

