In [217]:
import numpy as np
import pandas as pd
from pandas_datareader import data, wb
from datetime import datetime 
from datetime import date
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [218]:
# importing plotly for interactive plots
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objects as go

In [219]:
# importing the stock data from yahoo finance with the help of datareader
today = datetime.today().strftime ('%Y-%m-%d') #getting todays date for end_date
stock = {}
symbol = "RELIANCE.NS"
data_source = "yahoo"
start_date = "2020-05-01"
end_date = today
stock = data.DataReader(symbol, data_source,start_date, end_date)
stock = stock[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
stock.name = symbol

In [220]:
stock.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-04,1426.47998,1451.23999,1404.140015,1421.719971,1416.401611,24670336.0
2020-05-05,1440.050049,1465.109985,1433.609985,1446.930054,1441.517456,20723079.0
2020-05-06,1450.25,1471.01001,1431.930054,1447.030029,1441.616943,18685710.0
2020-05-07,1441.339966,1499.439941,1431.430054,1492.800049,1487.21582,23087436.0
2020-05-08,1530.48999,1565.060059,1522.670044,1547.130005,1541.342529,38887255.0


In [221]:
# as for prediction we will only need Adjusted Closing price so making a new dataframe with only Adj Close
stock_df = stock[['Adj Close']]
stock_df.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2020-05-04,1416.401611
2020-05-05,1441.517456
2020-05-06,1441.616943
2020-05-07,1487.21582
2020-05-08,1541.342529


In [222]:
# creating a variable for how many days of stock price to predict
no_of_pred = 10
# now creating a column prediction which will be our target column
# prediction column will have same values as Adj Close but shifting it no_of_pred units up so the there will be no_of_pred values at the bottom as NaN in it which we will predict
stock_df['Prediction'] = stock_df[['Adj Close']].shift(-no_of_pred)
stock_df.tail(20)

Unnamed: 0_level_0,Adj Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-06-15,1608.510376,1716.704102
2020-06-16,1611.648438,1697.725342
2020-06-17,1609.307251,1731.099976
2020-06-18,1649.705688,1760.349976
2020-06-19,1752.818481,1787.900024
2020-06-22,1739.618042,1851.800049
2020-06-23,1714.462524,1823.449951
2020-06-24,1721.386475,1798.0
2020-06-25,1711.473755,1824.25
2020-06-26,1735.134888,1878.050049


In [223]:
# like y=mx+c we need an x which is an indepandent variable and y which is an dependent variable
# now we will create a independent dataset which we will use to train our model
# for this dataset we will use Adj Close column only and convert it into numpy array and drop the last no_of_pred values
# removing the last no_of_pred values(rows) to make the independent dataset(X) and dependent dataset(y) same length and train model on X to predict no_of_pred values which are removed
X = np.array(stock_df.drop(['Prediction'],1))
# removing last no_of_pred values
X = X[:-no_of_pred]
X

array([[1416.40161133],
       [1441.51745605],
       [1441.61694336],
       [1487.21582031],
       [1541.3425293 ],
       [1556.14697266],
       [1459.87841797],
       [1490.85205078],
       [1430.57836914],
       [1453.94067383],
       [1435.36047363],
       [1403.62963867],
       [1428.33679199],
       [1435.85864258],
       [1426.19494629],
       [1418.72302246],
       [1440.14257812],
       [1466.74267578],
       [1458.92199707],
       [1514.66271973],
       [1529.9552002 ],
       [1535.88305664],
       [1573.89038086],
       [1575.78320312],
       [1563.62878418],
       [1531.39990234],
       [1566.2689209 ],
       [1531.94775391],
       [1582.85668945],
       [1608.51037598],
       [1611.6484375 ],
       [1609.30725098],
       [1649.70568848],
       [1752.81848145],
       [1739.61804199],
       [1714.46252441],
       [1721.38647461],
       [1711.47375488],
       [1735.1348877 ]])

In [224]:
# now we will create a dependent dataset, this is the target data which will have the future price predictions
# dependent dataset we will be created by Prediction column and converting it into numpy array 
# also removing the last no_of_pred values from predection column which NaNs which we will predict
y = np.array(stock_df['Prediction'])
y = y[:-no_of_pred]
y

array([1435.36047363, 1403.62963867, 1428.33679199, 1435.85864258,
       1426.19494629, 1418.72302246, 1440.14257812, 1466.74267578,
       1458.92199707, 1514.66271973, 1529.9552002 , 1535.88305664,
       1573.89038086, 1575.78320312, 1563.62878418, 1531.39990234,
       1566.2689209 , 1531.94775391, 1582.85668945, 1608.51037598,
       1611.6484375 , 1609.30725098, 1649.70568848, 1752.81848145,
       1739.61804199, 1714.46252441, 1721.38647461, 1711.47375488,
       1735.1348877 , 1716.70410156, 1697.7253418 , 1731.09997559,
       1760.34997559, 1787.90002441, 1851.80004883, 1823.44995117,
       1798.        , 1824.25      , 1878.05004883])

In [225]:
# now splitting the data into training and test datasets where ratio is training 80% and testing 20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [226]:
# creating and training the model using Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train) 

LinearRegression()

In [227]:
# now testing the model and getting the score known as coefficient of determination R^2
lr_test = lr.score(X_test, y_test)
print("Linear Regression score: ", lr_test)

Linear Regression score:  0.6389355569363868


In [228]:
# now we will create a numpy array and store the last no_of_pred values that we removed from Adj Close column
x_prediction = np.array(stock_df.drop(['Prediction'],1))[-no_of_pred:]
x_prediction

array([[1716.70410156],
       [1697.7253418 ],
       [1731.09997559],
       [1760.34997559],
       [1787.90002441],
       [1851.80004883],
       [1823.44995117],
       [1798.        ],
       [1824.25      ],
       [1878.05004883]])

In [229]:
# now predicting the stock prices for next no_of_pred days using linear regression
lr_prediction = lr.predict(x_prediction)
lr_prediction

array([1821.04807649, 1800.07573   , 1836.95613695, 1869.27864602,
       1899.72263587, 1970.33491343, 1939.00683519, 1910.88354365,
       1939.89092358, 1999.34229336])

In [230]:
rows = len(stock_df.axes[0])
rows

49

In [231]:
# putting the predicted values in the Prediction column
j=0
count = no_of_pred
for i in range(0,10):
    stock_df.iloc[rows-count]['Prediction'] = lr_prediction[j]
    j += 1
    count -= 1
stock_df.tail(20)

Unnamed: 0_level_0,Adj Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-06-15,1608.510376,1716.704102
2020-06-16,1611.648438,1697.725342
2020-06-17,1609.307251,1731.099976
2020-06-18,1649.705688,1760.349976
2020-06-19,1752.818481,1787.900024
2020-06-22,1739.618042,1851.800049
2020-06-23,1714.462524,1823.449951
2020-06-24,1721.386475,1798.0
2020-06-25,1711.473755,1824.25
2020-06-26,1735.134888,1878.050049


In [232]:
#above dataframe is not coreect as the predictions of next no_of_pred is there but the dates corresponding to it are previous days to fix it will need to change shift date by the same number as no_of_pred
stock_df_pred = stock_df[['Prediction']]
stock_df_curr = stock_df[['Adj Close']]

In [233]:
# plotting the stock prices currently available
fig = go.Figure()
fig.add_trace(go.Scatter(x=stock_df_curr.index, y=stock_df_curr['Adj Close'], mode='lines+markers', name = symbol, showlegend=True ))
fig.show()

In [234]:
stock_df_pred.tail()

Unnamed: 0_level_0,Prediction
Date,Unnamed: 1_level_1
2020-07-06,1970.334913
2020-07-07,1939.006835
2020-07-08,1910.883544
2020-07-09,1939.890924
2020-07-10,1999.342293


In [235]:
stock_df_pred.index = stock_df_pred.index.shift(no_of_pred+4, freq='D')
stock_df_pred.tail(20)

Unnamed: 0_level_0,Prediction
Date,Unnamed: 1_level_1
2020-06-29,1716.704102
2020-06-30,1697.725342
2020-07-01,1731.099976
2020-07-02,1760.349976
2020-07-03,1787.900024
2020-07-06,1851.800049
2020-07-07,1823.449951
2020-07-08,1798.0
2020-07-09,1824.25
2020-07-10,1878.050049


In [236]:
# plotting the predicted stock prices 
# the stock prices after the line are the predicted stock prices
fig = go.Figure()
fig.add_trace(go.Scatter(x=stock_df_pred.index, y=stock_df_pred['Prediction'], mode='lines+markers', name = symbol, showlegend=True))
fig.update_layout(shapes=[
    dict(
        type = 'line',
        yref = 'paper', y0=0, y1=1,
        xref = 'x', x0=end_date, x1=end_date,
        line=dict(color = 'red')
    ),
])
fig.show()