<a href="https://colab.research.google.com/github/Deveshk78/ml-design-patterns/blob/master/PredictionModels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import StandardScaler

##### In this routine we will make actual predictions
def getFuturePrices(pdf,forecastPeriod):


	### We will go with the assumption that today's stock price has
	### a linear relationship with the stock price 30 days from now.
	### This essentially means that the "Close" price has predictive power

	### So, for our basic model, we will eliminate all the columns except the "CLOSE" price
	### And we will shift the "CLOSE" price by 30 and create a new column called "PREDICTIONS"

	wdf = pdf[['CLOSE']]
	wdf['PREDICTIONS'] = wdf['CLOSE'].shift(-forecastPeriod)

	### Now let us start extracting our X and y values
	X = wdf.iloc[:,0:1].values
	y = wdf.iloc[:,1:2].values


	### Now that we have our X and y values, let's scale them
	### We will use our StandardScaler as usual
	from sklearn.preprocessing import StandardScaler
	stdScaler = StandardScaler()
	stdScaler.fit(X)
	X = stdScaler.transform(X)
	y = stdScaler.transform(y)

	### Note that X has values that are for forecasting
	### Further y has values that are "NaN"
	### Let us take care of them

	XForecast = X[-forecastPeriod:] ## We are getting X values for future
	X = X[:-forecastPeriod]
	y = y[:-forecastPeriod]


	### Now it is time to split the data into train and test
	from sklearn.model_selection import train_test_split
	testSize = 0.2
	XTrain,XTest,yTrain,yTest = train_test_split(X,y,test_size=testSize)


	### Now all we have to build is a model.
	### We will try both LinearRegression and SVR
	from sklearn.linear_model import LinearRegression
	from sklearn.svm import SVR
	linearModel = LinearRegression()
	svrModel = SVR()

	### Let us fit the data into the models
	linearModel.fit(XTrain,yTrain)
	svrModel.fit(XTrain,yTrain)


	### Now that the model is built, let us get the confidence score for each of the models
	linearScore = linearModel.score(XTest,yTest)
	svrScore = svrModel.score(XTest,yTest)
	print("Score for LinearRegression Model: ",linearScore)
	print("Score for SVR Model: ",svrScore)


	### Let us choose the model that we want to use for prediction
	### We will select the model based on the scores that we got
	predModel = linearModel
	predScore = linearScore

	if svrScore > linearScore:
	    predModel = svrModel
	    predScore = svrScore


	### It is now time to make our predictions!
	predictedValues = predModel.predict(XForecast)


	### Let us now put these values into a data frame
	predDF = pd.DataFrame(stdScaler.inverse_transform(XForecast),stdScaler.inverse_transform(predictedValues))
	predDF = predDF.reset_index()
	predDF.columns = ["Current Price","CLOSE"]


	### Now let us get the future dates list!
	from datetime import date, timedelta, datetime
	currentDate = pdf.iloc[-1:,:1].values[0][0]
	newDate = datetime.strptime(currentDate,"%Y-%m-%d").date()

	datesList = []

	for i in range(0,forecastPeriod):
	    newDate = (newDate+timedelta(1))
	    if newDate.weekday() == 5: ## If Saturday
	        newDate = (newDate+timedelta(2)) ## Advance by 2 days
	    elif newDate.weekday() == 6: ## If Sunday
	        newDate = (newDate+timedelta(1)) ## Advance by 1 day
	    datesList.append(str(newDate))

	### Let us add this list to the predicted dataframe
	predDF['DATE'] = datesList
	predDF = predDF[['DATE','CLOSE']]


	return predDF.copy(), predScore




##### In this routine we will make actual predictions
def getRegressionPredictions(pdf,forecastPeriod):


	### We will go with the assumption that today's stock price has
	### a linear relationship with the stock price 30 days from now.
	### This essentially means that the "Close" price has predictive power

	### So, for our basic model, we will eliminate all the columns except the "CLOSE" price
	### And we will shift the "CLOSE" price by 30 and create a new column called "PREDICTIONS"

	wdf = pdf[['CLOSE']]
	wdf['PREDICTIONS'] = wdf['CLOSE'].shift(-forecastPeriod)

	### Now let us start extracting our X and y values
	X = wdf.iloc[:,0:1].values
	y = wdf.iloc[:,1:2].values


	### Now that we have our X and y values, let's scale them
	### We will use our StandardScaler as usual
	from sklearn.preprocessing import StandardScaler
	stdScaler = StandardScaler()
	stdScaler.fit(X)
	X = stdScaler.transform(X)
	y = stdScaler.transform(y)

	### Note that X has values that are for forecasting
	### Further y has values that are "NaN"
	### Let us take care of them

	XForecast = X[-forecastPeriod:] ## We are getting X values for future
	X = X[:-forecastPeriod]
	y = y[:-forecastPeriod]


	### Now it is time to split the data into train and test
	from sklearn.model_selection import train_test_split
	testSize = 0.2
	XTrain,XTest,yTrain,yTest = train_test_split(X,y,test_size=testSize)


	### Now all we have to build is a model.
	### We will try both LinearRegression and SVR
	from sklearn.linear_model import LinearRegression
	from sklearn.svm import SVR
	linearModel = LinearRegression()
	svrModel = SVR()

	### Let us fit the data into the models
	linearModel.fit(XTrain,yTrain)
	svrModel.fit(XTrain,yTrain)


	### Now that the model is built, let us get the confidence score for each of the models
	linearScore = linearModel.score(XTest,yTest)
	svrScore = svrModel.score(XTest,yTest)
	print("Score for LinearRegression Model: ",linearScore)
	print("Score for SVR Model: ",svrScore)


	### Let us choose the model that we want to use for prediction
	### We will select the model based on the scores that we got
	predModel = linearModel

	if svrScore > linearScore:
	    predModel = svrModel


	### It is now time to make our predictions!
	predictedValues = predModel.predict(XForecast)


	### Let us now put these values into a data frame
	predDF = pd.DataFrame(stdScaler.inverse_transform(XForecast),stdScaler.inverse_transform(predictedValues))
	predDF = predDF.reset_index()
	predDF.columns = ["Current Price","Future Price"]


	### Now let us get the future dates list!
	from datetime import date, timedelta, datetime
	currentDate = pdf.iloc[-1:,:1].values[0][0]
	currentDate = datetime.strptime(currentDate,"%Y-%m-%d").date()

	datesList = []

	for i in range(1,forecastPeriod+1):
	    newDate = (currentDate+timedelta(i))
	    if newDate.weekday() == 5: ## If Saturday
	        newDate = (currentDate+timedelta(2)) ## Advance by 2 days
	    elif newDate.weekday() == 6: ## If Sunday
	        newDate = (currentDate+timedelta(1)) ## Advance by 1 day
	    datesList.append(str(newDate))

	### Let us add this list to the predicted dataframe
	predDF['DATE'] = datesList
	predDF = predDF[['DATE','Future Price','Current Price']]


	return predDF.copy()