In [1]:
#import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

#plot the graph inside the notebook window
%matplotlib qt

In [2]:
#function to calculate compound annual growth rate
def CAGR(first, last, periods):
    return ((last/first)**(1/periods)-1) * 100

In [3]:
#Reading data from the dataset
data = pd.read_csv('Dataset/BSE_SENSEX_DATA.csv')
data.head()

Unnamed: 0,Date,Open,High,Low,Close
0,2-January-1991,1027.38,1027.38,999.26,999.26
1,3-January-1991,983.51,989.08,981.08,982.35
2,7-January-1991,982.81,1010.77,978.69,1010.77
3,9-January-1991,1007.66,1013.8,998.09,1008.52
4,11-January-1991,1000.51,1034.3,997.04,1034.3


In [4]:
#Setting index as date
data['Date'] = pd.to_datetime(data.Date)
data.index = data['Date']
data.head()

Unnamed: 0_level_0,Date,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1991-01-02,1991-01-02,1027.38,1027.38,999.26,999.26
1991-01-03,1991-01-03,983.51,989.08,981.08,982.35
1991-01-07,1991-01-07,982.81,1010.77,978.69,1010.77
1991-01-09,1991-01-09,1007.66,1013.8,998.09,1008.52
1991-01-11,1991-01-11,1000.51,1034.3,997.04,1034.3


In [5]:
#Converting dates into number of days as dates cannot be passed directly 
#to any regression model
data.index = (data.index - pd.to_datetime('1970-01-01')).days

In [6]:
#Convert the pandas series into numpy array, we need to further 
#massage it before sending it to regression model
y = np.asarray(data['Close'])
x = np.asarray(data.index.values)

In [7]:
#Model initialization
#by default the degree of the equation is 1.
#Hence the mathematical model equation is y = mx + c, 
#which is an equation of a line.
regression_model = LinearRegression()

In [8]:
#Choose the order of your polynomial. Here the degree is set to 5.
#hence the mathematical model equation is 
#y = c0 + c1.x**1 + c2.x**2+....+ c5.x**5
poly = PolynomialFeatures(5)

In [9]:
#Convert dimension x in the higher degree polynomial expression
X_transform = poly.fit_transform(x.reshape(-1, 1))

In [10]:
#Fit the data(train the model)
regression_model.fit(X_transform, y.reshape(-1, 1))

LinearRegression()

In [11]:
# Prediction for historical dates. Let's call it learned values.
y_learned = regression_model.predict(X_transform)

In [12]:
#Now, add future dates to the date index and pass that index to 
#the regression model for future prediction.
#As we have converted date index into a range index, hence, here we 
#just need to add 3650 days ( roughly 10 yrs)
#to the previous index. x[-1] gives the last value of the series.
newindex = np.asarray(pd.RangeIndex(start=x[-1], stop=x[-1] + 3650))

In [13]:
#Convert the extended dimension x in the higher degree polynomial expression
X_extended_transform = poly.fit_transform(newindex.reshape(-1, 1))

In [14]:
#Prediction for future dates. Let's call it predicted values.
y_predict = regression_model.predict(X_extended_transform)

In [15]:
#Print the last predicted value
print ("Closing price at 2029 would be around ", y_predict[-1])

Closing price at 2029 would be around  [219302.32865956]


In [16]:
#Convert the days index back to dates index for plotting the graph
x = pd.to_datetime(data.index, origin='1970-01-01', unit='D')
future_x = pd.to_datetime(newindex, origin='1970-01-01', unit='D')

In [17]:
#Print CAGR for next ten years.
print ('Your investments will have a CAGR of ',(CAGR(y[-1], y_predict[-1], 10)), '%')

Your investments will have a CAGR of  [14.31007581] %


In [18]:
#Setting figure size
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10

In [19]:
#Plot the actual data
plt.figure(figsize=(16,8))
plt.plot(x,data['Close'], label='Close Price History')

[<matplotlib.lines.Line2D at 0x16fbc6f0100>]

In [20]:
#Plot the regression model
plt.plot(x,y_learned, color='r', label='Mathematical Model')

[<matplotlib.lines.Line2D at 0x16fbedc83a0>]

In [21]:
#Plot the future predictions
plt.plot(future_x,y_predict, color='g', label='Future Predictions')

[<matplotlib.lines.Line2D at 0x16fbeddb3d0>]

In [22]:
#Set the title of the graph
plt.suptitle('Stock Market Predictions', fontsize=16)

Text(0.5, 0.98, 'Stock Market Predictions')

In [23]:
#Set the title of the graph window
fig = plt.gcf()
fig.canvas.manager.set_window_title('Stock Market Predictions')

In [24]:
#display the legends
plt.legend()

<matplotlib.legend.Legend at 0x16fbf314280>

In [25]:
plt.show()