In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import preprocessing

In [6]:
Data = pd.read_csv('AxisBankwTs.csv', index_col = None)
Data = Data.drop(['Unnamed: 0'], axis =1)
Data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume
0,2000-01-03,5.1,5.18,5.1,5.18,0.9778,82500
1,2000-01-04,5.4,5.51,5.08,5.38,1.0156,494000
2,2000-01-05,5.01,5.42,5.0,5.24,0.9892,383000
3,2000-01-06,5.3,5.3,5.0,5.11,0.9646,164500
4,2000-01-07,5.0,5.22,4.8,4.86,0.9174,149000


# Creating Targets

In [7]:
Targets = pd.DataFrame()

Target1=[]
for i in range(0,len(Data['Open'])-1):
    if(Data.loc[i+1]['Open']-Data.loc[i]['Close']>=0):
        Target1.append(1)
    else:
        Target1.append(0)
Targets['Target1']=pd.Series(Target1)

Target2=[]
for i in range(0,len(Data['Open'])-1):
    if(Data.loc[i+1]['Open']-Data.loc[i]['Open']>=0):
        Target2.append(1)
    else:
        Target2.append(0)
Targets['Target2']=pd.Series(Target2)

In [8]:
Targets.head()

Unnamed: 0,Target1,Target2
0,1,1
1,0,0
2,1,1
3,0,0
4,1,1


In [9]:
#Drop the Last row
Data = Data.drop(Data.index[len(Data)-1])
Data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume
0,2000-01-03,5.1,5.18,5.1,5.18,0.9778,82500
1,2000-01-04,5.4,5.51,5.08,5.38,1.0156,494000
2,2000-01-05,5.01,5.42,5.0,5.24,0.9892,383000
3,2000-01-06,5.3,5.3,5.0,5.11,0.9646,164500
4,2000-01-07,5.0,5.22,4.8,4.86,0.9174,149000


In [10]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4616 entries, 0 to 4615
Data columns (total 7 columns):
Date         4616 non-null object
Open         4616 non-null float64
High         4616 non-null float64
Low          4616 non-null float64
Close        4616 non-null float64
Adj_Close    4616 non-null float64
Volume       4616 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 288.5+ KB


#  Prediction Using TimeSeries Data

## Predicting Target 1 [Open(i+1) - Close(i)] 

### Machine Learning Models

In [11]:
X = Data
X = X.drop('Date', axis = 1)
Y = Targets['Target1']
prediction = pd.DataFrame()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.6, random_state = 2,shuffle='True')
X_train = preprocessing.normalize(X_train)
X_test =  preprocessing.normalize(X_test)

### Decision Tree

In [12]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
prediction['DT'] = clf.predict(X_test)
print("Accuracy "+str(accuracy_score(y_test, prediction["DT"])))
print(classification_report(y_test, prediction['DT']))
conf_mat = confusion_matrix(y_test, prediction['DT'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

Accuracy 0.5740072202166066
             precision    recall  f1-score   support

          0       0.35      0.37      0.36       900
          1       0.69      0.67      0.68      1870

avg / total       0.58      0.57      0.58      2770

[[0.37333333 0.62666667]
 [0.32941176 0.67058824]]


### Logistic Regression

In [13]:
model = LogisticRegression()
model.fit(X_train,y_train)
prediction["LR"] = model.predict(X_test)
print(accuracy_score(y_test, prediction["LR"]))
print(classification_report(y_test, prediction['LR']))
conf_mat = confusion_matrix(y_test, prediction['LR'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

0.6750902527075813
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       900
          1       0.68      1.00      0.81      1870

avg / total       0.46      0.68      0.54      2770

[[0. 1.]
 [0. 1.]]


  'precision', 'predicted', average, warn_for)


In [14]:
# # Support Vector Machine(SVM)

model = svm.SVC()
model.fit(X_train, y_train)  

prediction["SVM"] = model.predict(X_test)

print(accuracy_score(y_test, prediction["SVM"]))

print(classification_report(y_test, prediction['SVM']))

conf_mat = confusion_matrix(y_test, prediction['SVM'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

0.6750902527075813
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       900
          1       0.68      1.00      0.81      1870

avg / total       0.46      0.68      0.54      2770

[[0. 1.]
 [0. 1.]]


  'precision', 'predicted', average, warn_for)


### K Nearest Neighbours(KNN)

In [15]:
model = KNeighborsClassifier(n_neighbors=3)
print(model.fit(X_train,y_train))
prediction["KNN"] = model.predict(X_test)
print("Accuracy : " + str(accuracy_score(y_test, prediction["KNN"])))
print(classification_report(y_test, prediction['KNN']))
conf_mat = confusion_matrix(y_test, prediction['KNN'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')
Accuracy : 0.5902527075812274
             precision    recall  f1-score   support

          0       0.36      0.32      0.34       900
          1       0.69      0.72      0.70      1870

avg / total       0.58      0.59      0.58      2770

[[0.32111111 0.67888889]
 [0.2802139  0.7197861 ]]


### Random Forest Classifier

In [16]:
rf = RandomForestClassifier(n_estimators=100)
print(rf.fit(X_train, y_train))
prediction["Random Forest Classifier"] = rf.predict(X_test)
print(accuracy_score(y_test, prediction["Random Forest Classifier"]))
print(classification_report(y_test, prediction['Random Forest Classifier']))
conf_mat = confusion_matrix(y_test, prediction['Random Forest Classifier'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
0.607942238267148
             precision    recall  f1-score   support

          0       0.36      0.27      0.31       900
          1       0.69      0.77      0.73      1870

avg / total       0.58      0.61      0.59      2770

[[0.27444444 0.72555556]
 [0.2315508  0.7684492 ]]


### ADA Boost Classifier

In [17]:
model = AdaBoostClassifier()
print(model.fit(X_train,y_train))
prediction["ADA"] = model.predict(X_test)
print(accuracy_score(y_test, prediction["ADA"]))
print(classification_report(y_test, prediction['ADA']))
conf_mat = confusion_matrix(y_test, prediction['ADA'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)
0.6736462093862816
             precision    recall  f1-score   support

          0       0.17      0.00      0.00       900
          1       0.67      1.00      0.80      1870

avg / total       0.51      0.67      0.54      2770

[[0.00111111 0.99888889]
 [0.0026738  0.9973262 ]]


## Predicting Target 2 [Open(i+1) - Open(i)] 

### Machine Learning Models

In [18]:
X = Data
X = X.drop('Date', axis = 1)
Y = Targets['Target2']
prediction = pd.DataFrame()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state = 42,shuffle='Fasle')
X_train = preprocessing.normalize(X_train)
X_test =  preprocessing.normalize(X_test)

### Decision Tree

In [19]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
prediction['DT'] = clf.predict(X_test)
print("Accuracy "+str(accuracy_score(y_test, prediction["DT"])))
print(classification_report(y_test, prediction['DT']))
conf_mat = confusion_matrix(y_test, prediction['DT'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

Accuracy 0.6830708661417323
             precision    recall  f1-score   support

          0       0.65      0.71      0.68       725
          1       0.72      0.66      0.68       799

avg / total       0.69      0.68      0.68      1524

[[0.71310345 0.28689655]
 [0.34418023 0.65581977]]


### Logistic Regression

In [20]:
model = LogisticRegression()
model.fit(X_train,y_train)
prediction["LR"] = model.predict(X_test)
print(accuracy_score(y_test, prediction["LR"]))
print(classification_report(y_test, prediction['LR']))
conf_mat = confusion_matrix(y_test, prediction['LR'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

0.5242782152230971
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       725
          1       0.52      1.00      0.69       799

avg / total       0.27      0.52      0.36      1524

[[0. 1.]
 [0. 1.]]


  'precision', 'predicted', average, warn_for)


In [21]:
# # Support Vector Machine(SVM)

model = svm.SVC()
model.fit(X_train, y_train)  

prediction["SVM"] = model.predict(X_test)

print(accuracy_score(y_test, prediction["SVM"]))

print(classification_report(y_test, prediction['SVM']))

conf_mat = confusion_matrix(y_test, prediction['SVM'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

0.5242782152230971
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       725
          1       0.52      1.00      0.69       799

avg / total       0.27      0.52      0.36      1524

[[0. 1.]
 [0. 1.]]


  'precision', 'predicted', average, warn_for)


### K Nearest Neighbours(KNN)

In [22]:
model = KNeighborsClassifier(n_neighbors=3)
print(model.fit(X_train,y_train))
prediction["KNN"] = model.predict(X_test)
print("Accuracy : " + str(accuracy_score(y_test, prediction["KNN"])))
print(classification_report(y_test, prediction['KNN']))
conf_mat = confusion_matrix(y_test, prediction['KNN'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')
Accuracy : 0.6148293963254593
             precision    recall  f1-score   support

          0       0.59      0.60      0.60       725
          1       0.63      0.62      0.63       799

avg / total       0.62      0.61      0.61      1524

[[0.60413793 0.39586207]
 [0.37546934 0.62453066]]


### Random Forest Classifier

In [23]:
rf = RandomForestClassifier(n_estimators=100)
print(rf.fit(X_train, y_train))
prediction["Random Forest Classifier"] = rf.predict(X_test)
print(accuracy_score(y_test, prediction["Random Forest Classifier"]))
print(classification_report(y_test, prediction['Random Forest Classifier']))
conf_mat = confusion_matrix(y_test, prediction['Random Forest Classifier'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
0.718503937007874
             precision    recall  f1-score   support

          0       0.69      0.73      0.71       725
          1       0.74      0.71      0.72       799

avg / total       0.72      0.72      0.72      1524

[[0.73241379 0.26758621]
 [0.29411765 0.70588235]]


### ADA Boost Classifier

In [24]:
model = AdaBoostClassifier()
print(model.fit(X_train,y_train))
prediction["ADA"] = model.predict(X_test)
print(accuracy_score(y_test, prediction["ADA"]))
print(classification_report(y_test, prediction['ADA']))
conf_mat = confusion_matrix(y_test, prediction['ADA'])
conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
print(conf_mat_normalized)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)
0.5223097112860893
             precision    recall  f1-score   support

          0       0.50      0.36      0.42       725
          1       0.54      0.67      0.60       799

avg / total       0.52      0.52      0.51      1524

[[0.35724138 0.64275862]
 [0.32790989 0.67209011]]
