In [39]:
import pandas as pd
import numpy as np
import math
import datetime as dt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 
from sklearn.preprocessing import MinMaxScaler

from datetime import date
from datetime import time
from datetime import datetime
from datetime import timedelta
from itertools import cycle

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [40]:
data = pd.read_csv('D:/Chrome Downloads/Train.csv')
data2=data.copy()
data.head()


Unnamed: 0,Date,Open,High,Low,Close,Volume,Stock Trading
0,2016-12-30,42120,42330,41700,41830,610000,25628028000
1,2016-12-29,43000,43220,42540,42660,448400,19188227000
2,2016-12-28,43940,43970,43270,43270,339900,14780670000
3,2016-12-27,43140,43700,43140,43620,400100,17427993000
4,2016-12-26,43310,43660,43090,43340,358200,15547803000


In [41]:
# Checking null value
data.isnull().sum()

Date             0
Open             0
High             0
Low              0
Close            0
Volume           0
Stock Trading    0
dtype: int64

In [42]:
# Checking na value
data.isna().any()

Date             False
Open             False
High             False
Low              False
Close            False
Volume           False
Stock Trading    False
dtype: bool

In [43]:
# Checking Data type of each column
print("Date column data type: ", type(data['Date'][0]))
print("Open column data type: ", type(data['Open'][0]))
print("Close column data type: ", type(data['Close'][0]))
print("High column data type: ", type(data['High'][0]))
print("Low column data type: ", type(data['Low'][0]))

Date column data type:  <class 'str'>
Open column data type:  <class 'numpy.int64'>
Close column data type:  <class 'numpy.int64'>
High column data type:  <class 'numpy.int64'>
Low column data type:  <class 'numpy.int64'>


In [44]:
# convert date field from string to Date format and make it index
data['Date'] = pd.to_datetime(data.Date)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Stock Trading
0,2016-12-30,42120,42330,41700,41830,610000,25628028000
1,2016-12-29,43000,43220,42540,42660,448400,19188227000
2,2016-12-28,43940,43970,43270,43270,339900,14780670000
3,2016-12-27,43140,43700,43140,43620,400100,17427993000
4,2016-12-26,43310,43660,43090,43340,358200,15547803000


In [45]:
#Sort dataset by w.r.t date coloumn
data.sort_values(by='Date', inplace=True)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Stock Trading
1225,2012-01-04,14050,14050,13700,13720,559100,7719804000
1224,2012-01-05,13720,13840,13600,13800,511500,7030811000
1223,2012-01-06,13990,14030,13790,13850,765500,10635609000
1222,2012-01-10,13890,14390,13860,14390,952300,13533413000
1221,2012-01-11,14360,14750,14280,14590,1043400,15191988000


In [46]:
#Check the size
data.shape

(1226, 7)

In [47]:
#Total Time duration
print("Starting date: ",data.iloc[0][0])
print("Ending date: ", data.iloc[-1][0])
print("Duration: ", data.iloc[-1][0]-data.iloc[0][0])

Starting date:  2012-01-04 00:00:00
Ending date:  2016-12-30 00:00:00
Duration:  1822 days 00:00:00


In [48]:
monthvise= data.groupby(data['Date'].dt.strftime('%B'))[['Open','Close']].mean().sort_values(by='Close')
monthvise.head()

Unnamed: 0_level_0,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
February,30613.979592,30556.326531
January,31510.736842,31551.526316
May,32572.525253,32591.515152
June,32655.849057,32616.603774
March,33158.238095,33193.571429


In [49]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=monthvise.index,
    y=monthvise['Open'],
    name='Stock Open Price',
    marker_color='crimson'
))
fig.add_trace(go.Bar(
    x=monthvise.index,
    y=monthvise['Close'],
    name='Stock Close Price',
    marker_color='lightsalmon'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45, 
                  title='Monthwise comparision between Stock actual, open and close price')
fig.show()

In [50]:
names = cycle(['Stock Open Price','Stock Close Price'])

fig2 = px.line(data, x=data.Date, y=[data['Open'], data['Close']],labels={'date': 'Date','value':'Stock value'})
fig2.update_layout(title_text='Stock Analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig2.for_each_trace(lambda t:  t.update(name = next(names)))
fig2.update_xaxes(showgrid=False)
fig2.update_yaxes(showgrid=False)

fig2.show()

In [51]:
closedf = data[['Date','Close']]
print("Shape of close dataframe:", closedf.shape)
print(closedf)

Shape of close dataframe: (1226, 2)
           Date  Close
1225 2012-01-04  13720
1224 2012-01-05  13800
1223 2012-01-06  13850
1222 2012-01-10  14390
1221 2012-01-11  14590
...         ...    ...
4    2016-12-26  43340
3    2016-12-27  43620
2    2016-12-28  43270
1    2016-12-29  42660
0    2016-12-30  41830

[1226 rows x 2 columns]


In [52]:
#Plotting stock close price chart
fig3 = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'Date':'Date','Close':'Close Stock'})
fig3.update_traces(marker_line_width=2, opacity=0.6)
fig3.update_layout(title_text='Stock close price chart', plot_bgcolor='white', font_size=15, font_color='black')
fig3.update_xaxes(showgrid=False)
fig3.update_yaxes(showgrid=False)
fig3.show()

In [53]:
#Normalizing / scaling close value between 0 to 1
du = closedf.copy()
close_stock = closedf.copy()
del closedf['Date']
scaler=MinMaxScaler(feature_range=(0,1))
closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
print(closedf.shape)
print(close_stock)

(1226, 1)
           Date  Close
1225 2012-01-04  13720
1224 2012-01-05  13800
1223 2012-01-06  13850
1222 2012-01-10  14390
1221 2012-01-11  14590
...         ...    ...
4    2016-12-26  43340
3    2016-12-27  43620
2    2016-12-28  43270
1    2016-12-29  42660
0    2016-12-30  41830

[1226 rows x 2 columns]


In [54]:
training_size=int(len(closedf)*0.7)
test_size=len(closedf)-training_size
train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
print("train_data: ", train_data.shape)
#print(train_data)
print("test_data: ", test_data.shape)

train_data:  (858, 1)
test_data:  (368, 1)


In [55]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

# reshape
time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print("X_train: ", X_train.shape)
#print(X_train)
print("y_train: ", y_train.shape)
#print(y_train)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)


X_train:  (842, 15)
y_train:  (842,)
X_test:  (352, 15)
y_test (352,)


In [56]:
from sklearn.svm import SVR
#parameters are chosen after seeing and using different examples
svr_lin = SVR(kernel= 'poly',degree=2)
svr_poly = SVR(kernel= 'poly', degree= 2)
svr_rbf = SVR(kernel= 'rbf') #Three different models
svr_rbf.fit(X_train, y_train)
#svr_lin.fit(X_train, y_train)
svr_poly.fit(X_train, y_train)



SVR(degree=2, kernel='poly')

In [57]:
train_predict=svr_rbf.predict(X_train)
print(train_predict.shape)


(842,)


In [58]:
# The prediction 
#1. RBF
train_predict=svr_rbf.predict(X_train)
tp=train_predict
test_predict=svr_rbf.predict(X_test)

train_predict = train_predict.reshape(-1,1)
test_predict = test_predict.reshape(-1,1)
print("Using RBF kernel:")
print("Train data prediction:", train_predict.shape)
print("Test data prediction:", test_predict.shape)
print("-------------------------------------------")


#3. Polynomial
train_predict3=svr_poly.predict(X_train)
tp2=train_predict3
test_predict3=svr_poly.predict(X_test)

train_predict3 = train_predict3.reshape(-1,1)
test_predict3 = test_predict3.reshape(-1,1)
print("Using Polynomial kernel:")
print("Train data prediction:", train_predict3.shape)
print("Test data prediction:", test_predict3.shape)

print("-------------------------------------------")
#2. Mix
svr_lin.fit(X_train, tp)
train_predict2=svr_lin.predict(X_train)
test_predict2=svr_lin.predict(X_test)

train_predict2 = train_predict2.reshape(-1,1)
test_predict2 = test_predict2.reshape(-1,1)
print("Using Hybrid kernel:")
print("Train data prediction:", train_predict2.shape)
print("Test data prediction:", test_predict2.shape)



Using RBF kernel:
Train data prediction: (842, 1)
Test data prediction: (352, 1)
-------------------------------------------
Using Polynomial kernel:
Train data prediction: (842, 1)
Test data prediction: (352, 1)
-------------------------------------------
Using Hybrid kernel:
Train data prediction: (842, 1)
Test data prediction: (352, 1)


In [59]:
#Comparision between original stock close price vs predicted close price
#1.RBF
# shift train predictions for plotting
look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'Date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['Date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','Date': 'Date'})
fig.update_layout(title_text='Original close price vs predicted close price(Using RBF Kernel)',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (1226, 1)
Test predicted data:  (1226, 1)


In [60]:
#Comparision between original stock close price vs predicted close price
#2. Sigmoid
# shift train predictions for plotting
look_back2=time_step
trainPredictPlot2 = np.empty_like(closedf)
trainPredictPlot2[:, :] = np.nan
trainPredictPlot2[look_back2:len(train_predict2)+look_back2, :] = train_predict2
print("Train predicted data: ", trainPredictPlot2.shape)

# shift test predictions for plotting
testPredictPlot2 = np.empty_like(closedf)
testPredictPlot2[:, :] = np.nan
testPredictPlot2[len(train_predict2)+(look_back2*2)+1:len(closedf)-1, :] = test_predict2
print("Test predicted data: ", testPredictPlot2.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'Date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot2.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot2.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['Date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','Date': 'Date'})
fig.update_layout(title_text='Original close price vs predicted close price(Using Hybrid Model)',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (1226, 1)
Test predicted data:  (1226, 1)


In [61]:
#Comparision between original stock close price vs predicted close price
#3. Polynomial
# shift train predictions for plotting
look_back3=time_step
trainPredictPlot3 = np.empty_like(closedf)
trainPredictPlot3[:, :] = np.nan
trainPredictPlot3[look_back3:len(train_predict3)+look_back3, :] = train_predict3
print("Train predicted data: ", trainPredictPlot3.shape)

# shift test predictions for plotting
testPredictPlot3 = np.empty_like(closedf)
testPredictPlot3[:, :] = np.nan
testPredictPlot3[len(train_predict3)+(look_back3*2)+1:len(closedf)-1, :] = test_predict3
print("Test predicted data: ", testPredictPlot3.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'Date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot3.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot3.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['Date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','Date': 'Date'})
fig.update_layout(title_text='Original close price vs predicted close price(Using Polynomial Kernel)',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (1226, 1)
Test predicted data:  (1226, 1)


In [62]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
para={'n_neighbors':[2,3,4,5,6,7]}
knn=KNeighborsRegressor()
m1=GridSearchCV(knn,para,cv=5)
m1.fit(X_train,y_train)
p1=m1.predict(X_test)
r2=r2_score(y_test,p1)
p2=m1.predict(X_train)
test_predict4=p1.reshape(-1,1)
train_predict4=p2.reshape(-1,1)

In [63]:
train_predict4 = scaler.inverse_transform(train_predict4)
test_predict4 = scaler.inverse_transform(test_predict4)

In [64]:
from sklearn.model_selection import GridSearchCV
para2={'C':[1,10,100,50,25,1000],'gamma':[0.05,0.1,0.06,0.025,0.9,0.01]}
rb=SVR(kernel='rbf')
m2=GridSearchCV(rb,para2,cv=5)
m2.fit(X_train,y_train)
rp1=m2.predict(X_test)
#rp2=r2_score(y_test,p1)
rp2=m2.predict(X_train)
test_predict5=rp1.reshape(-1,1)
train_predict5=rp2.reshape(-1,1)
r=r2_score(y_train,train_predict5)



In [65]:
#1.RBF
# shift train predictions for plotting
look_back4=time_step
trainPredictPlot4 = np.empty_like(closedf)
trainPredictPlot4[:, :] = np.nan
trainPredictPlot4[look_back:len(train_predict4)+look_back, :] = train_predict4
print("Train predicted data: ", trainPredictPlot4.shape)

# shift test predictions for plotting
testPredictPlot4 = np.empty_like(closedf)
testPredictPlot4[:, :] = np.nan
testPredictPlot4[len(train_predict4)+(look_back4*2)+1:len(closedf)-1, :] = test_predict4
print("Test predicted data: ", testPredictPlot4.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'Date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot4.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot4.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['Date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','Date': 'Date'})
fig.update_layout(title_text='Original close price vs predicted close price(Using  KNN Algorithm)',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (1226, 1)
Test predicted data:  (1226, 1)


In [66]:
#Predicting next 10 days on the basis of last 15 test data
#1. RBF Kernel
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 30
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        
        yhat = svr_rbf.predict(x_input)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat.tolist())
        temp_input=temp_input[1:]
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        yhat = svr_rbf.predict(x_input)
        
        temp_input.extend(yhat.tolist())
        lst_output.extend(yhat.tolist())
        
        i=i+1
        
print("Output of predicted next days(RBF): ", lst_output)

Output of predicted next days(RBF):  [0.6093208985326327, 0.6095506250642679, 0.6101285027102785, 0.6110564975550256, 0.613335063814147, 0.6138143107243054, 0.6140423688117891, 0.6157584638741277, 0.6159461997335955, 0.6160547808167482, 0.6158738018173152, 0.6172563488008311, 0.6171787534394425, 0.6167872274493954, 0.6177740620770636, 0.6200149933580261, 0.620944994786514, 0.6219362553231971, 0.6231541843332025, 0.624332216599463, 0.6253893959816889, 0.6264895619307035, 0.6277188578770218, 0.6288668195600738, 0.6300342480355139, 0.6313088467727366, 0.6327034887358984, 0.6340346709710314, 0.6354565550387589, 0.6370470573665483]


In [67]:
#2. Polynomial Kernel
x_input2=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input2=list(x_input2)
temp_input2=temp_input2[0].tolist()

from numpy import array

lst_output2=[]
n_steps2=time_step
i=0
pred_days2 = 30
while(i<pred_days2):
    
    if(len(temp_input2)>time_step):
        
        x_input2=np.array(temp_input2[1:])
        #print("{} day input {}".format(i,x_input))
        x_input2=x_input2.reshape(1,-1)
        
        yhat2 = svr_poly.predict(x_input2)
        #print("{} day output {}".format(i,yhat))
        temp_input2.extend(yhat2.tolist())
        temp_input2=temp_input2[1:]
       
        lst_output2.extend(yhat2.tolist())
        i=i+1
        
    else:
        yhat2 = svr_poly.predict(x_input2)
        
        temp_input2.extend(yhat2.tolist())
        lst_output2.extend(yhat2.tolist())
        
        i=i+1
        
print("Output of predicted next days(Polynomial): ", lst_output2)

Output of predicted next days(Polynomial):  [0.5455589213625707, 0.49703899419752484, 0.43704515496116936, 0.37373957701731864, 0.3079164978008613, 0.2444932314379009, 0.19763819133217517, 0.1754069883079132, 0.17844349427458958, 0.2030878385375636, 0.238256876846038, 0.2675804603159555, 0.2813054255845351, 0.28213035011380394, 0.2747791125656469, 0.2640966056278955, 0.251702434757086, 0.23734023764148013, 0.22011934922748175, 0.2009166131920643, 0.18329060488016033, 0.17011516368777244, 0.162476916131953, 0.1604311717340294, 0.16293522057439588, 0.16773676040369231, 0.17220301631629795, 0.17517619267718196, 0.17671227490319083, 0.17731950393005633]


In [68]:
#3. Polynomial Kernel-2
x_input3=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input3=list(x_input3)
temp_input3=temp_input3[0].tolist()

from numpy import array

lst_output3=[]
n_steps3=time_step
i=0
pred_days3 = 30
while(i<pred_days3):
    
    if(len(temp_input3)>time_step):
        
        x_input3=np.array(temp_input3[1:])
        #print("{} day input {}".format(i,x_input))
        x_input3=x_input3.reshape(1,-1)
        
        yhat3 = svr_lin.predict(x_input3)
        #print("{} day output {}".format(i,yhat))
        temp_input3.extend(yhat3.tolist())
        temp_input3=temp_input3[1:]
       
        lst_output3.extend(yhat3.tolist())
        i=i+1
        
    else:
        yhat3 = svr_lin.predict(x_input3)
        
        temp_input3.extend(yhat3.tolist())
        lst_output3.extend(yhat3.tolist())
        
        i=i+1
        
print("Output of predicted next days(Polynomial): ", lst_output3)

Output of predicted next days(Polynomial):  [0.5308751039866936, 0.5259177276591347, 0.5199671601327435, 0.5136801219368882, 0.5073389323339546, 0.500815855641043, 0.49379175214250803, 0.4864615434997559, 0.4781919302334249, 0.46911242400096753, 0.46031231353784163, 0.45132582980194624, 0.44167967866859237, 0.43215094401044507, 0.422966225581738, 0.41437648408402117, 0.4082061908277692, 0.4020901112783728, 0.3960824662446809, 0.3902015266136776, 0.3844502615237535, 0.37884097757372975, 0.37340150881831624, 0.36814830186852876, 0.36312629433593435, 0.3583733827063376, 0.3538789724710071, 0.34965330212681955, 0.34572590031159073, 0.3420917567397993]


In [69]:
#4. KNN
x_input4=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input4=list(x_input4)
temp_input4=temp_input4[0].tolist()

from numpy import array

lst_output4=[]
n_steps4=time_step
i=0
pred_days4 = 30
while(i<pred_days4):
    
    if(len(temp_input4)>time_step):
        
        x_input4=np.array(temp_input4[1:])
        #print("{} day input {}".format(i,x_input))
        x_input4=x_input4.reshape(1,-1)
        
        yhat4 = m1.predict(x_input4)
        #print("{} day output {}".format(i,yhat))
        temp_input4.extend(yhat4.tolist())
        temp_input4=temp_input4[1:]
       
        lst_output4.extend(yhat4.tolist())
        i=i+1
        
    else:
        yhat4 = m1.predict(x_input2)
        
        temp_input4.extend(yhat4.tolist())
        lst_output4.extend(yhat4.tolist())
        
        i=i+1
        
print("Output of predicted next days(Polynomial): ", lst_output4)

Output of predicted next days(Polynomial):  [0.21267075591904466, 0.5427445402554302, 0.544611372862773, 0.5319583963018936, 0.5168311257296944, 0.5132159895694431, 0.5108454084807538, 0.5151272705721989, 0.5122677571339674, 0.5152161673630248, 0.5154680416036982, 0.5124159184520105, 0.4981183512608528, 0.4786499540699914, 0.4919252081666518, 0.4719826947580526, 0.4814946513764186, 0.4771090763623433, 0.4903546981953952, 0.48179097401250487, 0.47823510237947076, 0.4902213530091564, 0.5019557293981688, 0.5083118499422171, 0.5224019912881145, 0.5134234154147035, 0.5070821110024595, 0.4829170000296322, 0.4753015082822176, 0.48066494799537735]


In [70]:
#data2 = data2[['Date','Close']]
#print(data2)
#data2['Date'] = pd.to_datetime(data2.Date)
close_stock2=close_stock.copy()
close_stock3=close_stock.copy()
close_stock4=close_stock.copy()
svrdf=closedf.tolist()
svrdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
svrdf=scaler.inverse_transform(svrdf).reshape(1,-1).tolist()[0]
last_date = close_stock.at[0,'Date']

i=0
while i<30:
    last_date = last_date + timedelta(days=1,seconds=0, minutes=0, hours=0)
    add = {'Date':last_date,'Close':svrdf[1226+i]}
    close_stock=close_stock.append(add,ignore_index=True)
    i=i+1
#print(last_date)
print(close_stock)
#data2.sort_values(by='Date')

           Date         Close
0    2012-01-04  13720.000000
1    2012-01-05  13800.000000
2    2012-01-06  13850.000000
3    2012-01-10  14390.000000
4    2012-01-11  14590.000000
...         ...           ...
1251 2017-01-25  44155.399503
1252 2017-01-26  44222.635192
1253 2017-01-27  44286.811488
1254 2017-01-28  44355.360518
1255 2017-01-29  44432.038636

[1256 rows x 2 columns]


In [71]:
#2. Polynomial Kernel
svrdf2=closedf.tolist()
svrdf2.extend((np.array(lst_output2).reshape(-1,1)).tolist())
svrdf2=scaler.inverse_transform(svrdf2).reshape(1,-1).tolist()[0]
last_date2 = close_stock2.at[0,'Date']

i=0
while i<30:
    last_date2 = last_date2 + timedelta(days=1,seconds=0, minutes=0, hours=0)
    add = {'Date':last_date2,'Close':svrdf2[1226+i]}
    close_stock2=close_stock2.append(add,ignore_index=True)
    i=i+1
#print(last_date)
print(close_stock2)

           Date         Close
0    2012-01-04  13720.000000
1    2012-01-05  13800.000000
2    2012-01-06  13850.000000
3    2012-01-10  14390.000000
4    2012-01-11  14590.000000
...         ...           ...
1251 2017-01-25  21806.589219
1252 2017-01-26  22021.907417
1253 2017-01-27  22165.244249
1254 2017-01-28  22239.298773
1255 2017-01-29  22268.573284

[1256 rows x 2 columns]


In [72]:
#3. Polynomial Kernel-2
svrdf3=closedf.tolist()
svrdf3.extend((np.array(lst_output3).reshape(-1,1)).tolist())
svrdf3=scaler.inverse_transform(svrdf3).reshape(1,-1).tolist()[0]
last_date3 = close_stock3.at[0,'Date']

i=0
while i<30:
    last_date3 = last_date3 + timedelta(days=1,seconds=0, minutes=0, hours=0)
    add = {'Date':last_date3,'Close':svrdf3[1226+i]}
    close_stock3=close_stock3.append(add,ignore_index=True)
    i=i+1
#print(last_date)
print(close_stock3)

           Date         Close
0    2012-01-04  13720.000000
1    2012-01-05  13800.000000
2    2012-01-06  13850.000000
3    2012-01-10  14390.000000
4    2012-01-11  14590.000000
...         ...           ...
1251 2017-01-25  30997.180780
1252 2017-01-26  30780.505263
1253 2017-01-27  30576.785696
1254 2017-01-28  30387.445654
1255 2017-01-29  30212.243592

[1256 rows x 2 columns]


In [73]:
#4. KNN
svrdf4=closedf.tolist()
svrdf4.extend((np.array(lst_output4).reshape(-1,1)).tolist())
svrdf4=scaler.inverse_transform(svrdf4).reshape(1,-1).tolist()[0]
last_date4 = close_stock4.at[0,'Date']

i=0
while i<30:
    last_date4 = last_date4 + timedelta(days=1,seconds=0, minutes=0, hours=0)
    add = {'Date':last_date4,'Close':svrdf4[1226+i]}
    close_stock4=close_stock4.append(add,ignore_index=True)
    i=i+1
#print(last_date)
print(close_stock4)

           Date         Close
0    2012-01-04  13720.000000
1    2012-01-05  13800.000000
2    2012-01-06  13850.000000
3    2012-01-10  14390.000000
4    2012-01-11  14590.000000
...         ...           ...
1251 2017-01-25  38472.142857
1252 2017-01-26  38166.428571
1253 2017-01-27  37001.428571
1254 2017-01-28  36634.285714
1255 2017-01-29  36892.857143

[1256 rows x 2 columns]


In [74]:

names = cycle(['RBF','Polynomial'])

#close_stock['Date'] = pd.to_datetime(close_stock.Date)
#print(type(close_stock['Date'][0]))

plotdfx = pd.DataFrame({'Date': close_stock['Date'],
                       'RBF_close': close_stock['Close'],'Polynomial_close': close_stock2['Close']})
fig = px.line(plotdfx,x=plotdfx['Date'], y=[plotdfx['RBF_close'],plotdfx['Polynomial_close']],
              labels={'value':'Stock price','Date': 'Date'})
fig.update_layout(title_text='Closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Kernel')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(nticks=20)
fig.update_yaxes(nticks=20)
fig.show()

In [75]:
temp=close_stock[1225:1255]
temp2=close_stock2[1225:1255]
temp3=close_stock3[1225:1255]
temp4=close_stock4[1225:1255]
#print(temp)
names1 = cycle(['RBF','Polynomial','Hybrid','KNN'])
plotdfxx = pd.DataFrame({'Date': temp['Date'],
                       'RBF_close': temp['Close'],'Polynomial_close': temp2['Close'],'Poly-2_close': temp3['Close'],'KNN_close': temp4['Close']})
fig7 = px.line(plotdfxx,x=plotdfxx['Date'],y=[plotdfxx['RBF_close'],plotdfxx['Polynomial_close'],plotdfxx['Poly-2_close'],plotdfxx['KNN_close']],
               labels={'Close': 'Stock price','Date': 'Date'})
fig7.update_layout(title_text='Closing stock price(30 days prediction)',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Kernel')
fig7.for_each_trace(lambda t:  t.update(name = next(names1)))
fig7.update_xaxes(nticks=20)
fig7.update_yaxes(nticks=20)
fig7.show()

In [76]:
# Transform back to original form
#1. RBF kernel
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

#2. Hybrid
train_predict2 = scaler.inverse_transform(train_predict2)
test_predict2 = scaler.inverse_transform(test_predict2)

#3.Polynomial
train_predict3 = scaler.inverse_transform(train_predict3)
test_predict3 = scaler.inverse_transform(test_predict3)

print("R^2 value for RBF Kernel:")
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))
print("-----------------------------------------------------------------")

print("R^2 value for Hybrid Kernel:")
print("Train data R2 score:", r2_score(original_ytrain, train_predict2))
print("Test data R2 score:", r2_score(original_ytest, test_predict2))
print("-----------------------------------------------------------------")

print("R^2 value for Polynomial Kernel:")
print("Train data R2 score:", r2_score(original_ytrain, train_predict3))
print("Test data R2 score:", r2_score(original_ytest, test_predict3))
print("-----------------------------------------------------------------")

print("R^2 value for KNN ALgorithm:")
print("Train data R2 score:", r2_score(original_ytrain, train_predict4))
print("Test data R2 score:", r2_score(original_ytest, test_predict4))
print("-----------------------------------------------------------------")

R^2 value for RBF Kernel:
Train data R2 score: 0.962349773332185
Test data R2 score: 0.8347206956321351
-----------------------------------------------------------------
R^2 value for Hybrid Kernel:
Train data R2 score: 0.7780022164641915
Test data R2 score: 0.8242319840380677
-----------------------------------------------------------------
R^2 value for Polynomial Kernel:
Train data R2 score: 0.9019301885404767
Test data R2 score: 0.7453352797875177
-----------------------------------------------------------------
R^2 value for KNN ALgorithm:
Train data R2 score: 0.9947423045290149
Test data R2 score: 0.9179757203815151
-----------------------------------------------------------------


In [77]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
# Test options and evaluation metric
num_folds = 10
seed = 7
scoring = "r2"

# Spot-Check Algorithms
models = []
#models.append((' LR ', LinearRegression()))
models.append((' LASSO ', Lasso()))
models.append((' EN ', ElasticNet()))
models.append((' KNN ', KNeighborsRegressor()))
models.append((' CART ', DecisionTreeRegressor()))
models.append((' SVR(RBF) ', SVR(kernel='rbf')))
models.append((' SVR(Poly) ', SVR(kernel='poly',degree=2)))
models.append((' SVR(Sigmoid)', SVR(kernel='sigmoid')))

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=num_folds, random_state=seed, shuffle=True)
    cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
    # print(cv_results)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)


 LASSO : -0.017648 (0.023903)
 EN : -0.017648 (0.023903)
 KNN : 0.992455 (0.001493)
 CART : 0.989941 (0.002372)
 SVR(RBF) : 0.960602 (0.005529)
 SVR(Poly) : 0.899082 (0.009709)
 SVR(Sigmoid): -28729.559229 (1256.718111)
