In [3]:
# Installing packages
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [9]:
#Get the stock data 
df = quandl.get("NSE/SBIN")

#Showing the Data
df.head()

Unnamed: 0_level_0,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1998-03-20,275.4,278.8,273.3,,,,
1998-03-23,278.45,280.9,278.45,279.95,279.95,591700.0,1654.91
1998-03-24,282.0,288.4,280.0,282.25,284.35,4979900.0,14231.55
1998-03-25,285.0,290.25,284.5,288.7,289.15,3632600.0,10436.93
1998-03-26,289.0,289.0,280.1,281.75,281.7,3861750.0,10996.67


In [14]:
#Get the adjusted close price 
df = df[['Close']]
df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
1998-03-20,
1998-03-23,279.95
1998-03-24,284.35
1998-03-25,289.15
1998-03-26,281.70
...,...
2018-12-31,295.90
2019-01-01,299.60
2019-01-02,293.90
2019-01-03,291.10


In [18]:
#Filling the missing data with price information
df.isnull().sum()
df.fillna(279, inplace = True)

In [19]:
# After removal of na values
df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
1998-03-20,279.00
1998-03-23,279.95
1998-03-24,284.35
1998-03-25,289.15
1998-03-26,281.70
...,...
2018-12-31,295.90
2019-01-01,299.60
2019-01-02,293.90
2019-01-03,291.10


In [58]:
# A variable for predicting n days into the future
forecast_out = 30
#Create another column (The target or dependant variable) shifted n units up.
df['Prediction'] = df[['Close']].shift(-forecast_out)

#Print the new dataset
print(df)

             Close  Prediction
Date                          
1998-03-20  279.00      282.05
1998-03-23  279.95      278.05
1998-03-24  284.35      278.60
1998-03-25  289.15      263.95
1998-03-26  281.70      256.60
...            ...         ...
2018-12-31  295.90         NaN
2019-01-01  299.60         NaN
2019-01-02  293.90         NaN
2019-01-03  291.10         NaN
2019-01-04  297.65         NaN

[5184 rows x 2 columns]


In [59]:
df.tail()

Unnamed: 0_level_0,Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31,295.9,
2019-01-01,299.6,
2019-01-02,293.9,
2019-01-03,291.1,
2019-01-04,297.65,


In [60]:
## Create an independant data seet(x)
# Convert the dataframe into a numpy array
x = np.array(df.drop(['Prediction'],1))
#Remove the last n rows 
x = x[:-forecast_out]
x

array([[279.  ],
       [279.95],
       [284.35],
       ...,
       [288.15],
       [283.45],
       [287.05]])

In [61]:
## Create the dependant dataset(y) ##
# Convert the dataframe to a numpy array(All values included)
y = np.array(df['Prediction'])
# Get all of they values except the last n rows
y = y[:-forecast_out]

In [62]:
y

array([282.05, 278.05, 278.6 , ..., 293.9 , 291.1 , 297.65])

In [63]:
#Split the data into 80% training and 20% testing 
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2)


In [64]:
# Create and train the support vector Machine(Regressor)
support_vector  = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
support_vector.fit(x_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [65]:
#Testing Model : Score returns the coefficient of determination R^2 of the prediction
# The best possible score is 1
svm_confidence = support_vector.score(x_test,y_test)
svm_confidence

0.8969459572355837

In [66]:
#Create and train the linear regression model
lr = LinearRegression()
#Train the model 
lr.fit(x_train,y_train)
lr

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [67]:
#Testing Model : Score returns the coefficient of determination R^2 of the prediction
# The best possible score is 1
lr_confidence = lr.score(x_test,y_test)
lr_confidence

0.930700981753204

In [68]:
# Set x_forecast equal to the the last 30 rows of the original data set from Close column 
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
x_forecast

array([[282.7 ],
       [286.4 ],
       [289.4 ],
       [284.55],
       [286.4 ],
       [284.65],
       [286.8 ],
       [282.55],
       [280.15],
       [275.65],
       [275.4 ],
       [274.2 ],
       [281.25],
       [285.25],
       [289.  ],
       [289.2 ],
       [289.5 ],
       [292.75],
       [300.7 ],
       [294.05],
       [291.9 ],
       [293.05],
       [294.15],
       [292.15],
       [294.8 ],
       [295.9 ],
       [299.6 ],
       [293.9 ],
       [291.1 ],
       [297.65]])

In [69]:
# Print the predictions for the next n=30 days 
# using linear regression
lr_prediction = lr.predict(x_forecast)
lr_prediction

array([311.67125066, 315.20809944, 318.07581468, 313.43967505,
       315.20809944, 313.53526556, 315.59046147, 311.5278649 ,
       309.23369271, 304.93211986, 304.6931436 , 303.5460575 ,
       310.2851883 , 314.10880861, 317.69345264, 317.88463366,
       318.17140518, 321.27809668, 328.87754205, 322.52077328,
       320.46557737, 321.56486821, 322.61636379, 320.70455364,
       323.23770209, 324.28919768, 327.82604646, 322.37738752,
       319.70085331, 325.96203156])

In [70]:
#using SVM
support_vector  = support_vector.predict(x_forecast)
print(support_vector)

[285.86745162 291.51666212 283.22280216 292.71235383 291.51666212
 292.12429335 292.19919389 283.30616205 274.36337658 279.46308066
 278.55597552 270.39095028 267.82167246 289.60321947 283.65213098
 283.36887463 283.18053737 278.79574894 285.13673381 287.00486786
 276.43628388 280.94344483 287.09929957 276.43850917 283.97530882
 267.48063697 286.57356332 286.61197912 279.27443568 260.82490285]
