## Import Libraries

In [None]:
import math
import numpy as np
import pandas as pd
import quandl
from sklearn import preprocessing, model_selection, svm
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
import datetime
import matplotlib.pyplot as plt
from matplotlib import style
import pickle
from astropy.table import Table, Column
from sklearn.metrics import mean_squared_error

style.use('ggplot')

## Read, Understand and Pre-process Data
___
### Understand Data

In [None]:
df = quandl.get("WIKI/GOOGL")

df.tail()

In [None]:
df = df[['Adj. Open',  'Adj. High',  'Adj. Low',  'Adj. Close', 'Adj. Volume']] #keeping only the important features

In [None]:
#creating new features by understanding the relationship b/w the given data's columns
df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0

In [None]:
df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
df.head()

In [None]:
label_col='Adj. Close' #our label
df.fillna(-99999, inplace=True)
future_data = int(math.ceil(0.01 * len(df))) #1 pc of the data

print(future_data)

In [None]:
df['label'] = df[label_col].shift(-future_data) #1 pc is 35 days. So, 35 days into the future
df.head()

___
___
___
___
___

### Pre-Process Data

In [None]:
X = np.array(df.drop(['label'], 1)) #for training, storing all features in X and leaving out the label
print("original:\n")
print(X)
X = preprocessing.scale(X)
print("\n\nafter preprocess:\n")
print(X)

In [None]:
X_lately = X[-future_data:] #the recent features on which prediction would be made
X = X[:-future_data] #rest of the features

print(df.tail())

df.dropna(inplace=True) #dropping all rows having no value of label

print("\n\n",df.tail())

In [None]:
y = np.array(df['label']) #label values in y

## Train ML Algorithms using Train Data
___
### Splitting data into train/test

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)

In [None]:
print("\t\t\tRandom Forest Regressor:\n\nParameters and their values:\n")
rfc = RandomForestRegressor()
rfc.fit(X_train, y_train)

In [None]:
print("\t\t\tLinear Regression:\n\nParameters and their values:\n")
lr_clf = LinearRegression()
lr_clf.fit(X_train, y_train)

In [None]:
print("\t\t\tDecision Tree Regressor:\n\nParameters and their values:\n")
dt_clf = tree.DecisionTreeRegressor()
dt_clf.fit(X_train, y_train)

___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___
___

## Evaluate ML Algorithms using Test Data
___

In [None]:
print("Prediction using Random Forest Regressor:\n\n")

rfc_accuracy = rfc.score(X_test, y_test)
print("Accuracy score =",rfc_accuracy)

rfc_predicted_set = rfc.predict(X_test)
print("\nPredicted Price:\n", rfc_predicted_set)

In [None]:
print("Prediction using Linear Regression:\n\n")

lr_accuracy = lr_clf.score(X_test, y_test)
print("Accuracy score =",lr_accuracy)

lr_predicted_set = lr_clf.predict(X_test)
print("\nPredicted Price:\n",lr_predicted_set)

In [None]:
print("Prediction using Decision Tree Regressor:\n\n")

dt_accuracy = dt_clf.score(X_test, y_test)
print("Accuracy score =",dt_accuracy)

dt_predicted_set = lr_clf.predict(X_test)
print("\nPredicted Price:\n",dt_predicted_set)

___
___

## Selection of Best Model
___

In [None]:
rfc_rms = np.sqrt(mean_squared_error(y_test, rfc_predicted_set))
lr_rms = np.sqrt(mean_squared_error(y_test, lr_predicted_set))
dt_rms = np.sqrt(mean_squared_error(y_test, dt_predicted_set))

DR=[('LinearRegression',lr_rms),('RandomForestRegressor',rfc_rms),
   ('DecisionTreeRegressor',dt_rms),]
t = Table(rows=DR, names=('Model', 'RMSE'))
print("Detailed Performance of all the models:\n")
print(t)

In [None]:
BM_Arr={'RandomForestRegressor':rfc_rms,'LinearRegression':lr_rms,
        'DecisionTreeRegressor':lr_rms}

import operator
Best_Model = min(BM_Arr.items(), key=operator.itemgetter(1))[0]

t2 = Table(rows=[(Best_Model,BM_Arr[Best_Model])], names=('Model', 'RMSE'))
print("Best Model:\n")
print (t2)

___

## Application Phase
___

In [None]:
print("All Features in form of DataFrame:\n")
df.tail()

### Train Best Model on All Data

In [None]:
rfc.fit(X,y)

### Save the Trained Model as Pickle File

In [None]:
with open('randomforest.pickle','wb') as f:
    pickle.dump(rfc, f)
    

## Make Prediction of future prices
___

### Load the Trained Model

In [None]:
pickle_in = open('randomforest.pickle','rb')
rfc = pickle.load(pickle_in)

### Apply Trained Model and Output Prediction to User

In [None]:
rfc_predicted_combined = rfc.predict(X_lately)
print("\nPredicted Price:\n",rfc_predicted_combined)

### Adding dates with our predicted prices

In [None]:
df['Predicted']=np.nan
print(df.tail())
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day

for i in rfc_predicted_combined:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += 86400
    df.loc[next_date] = [np.nan for x in range(len(df.columns)-1)]+[i]

In [None]:
df[-future_data:] #dataframe just with our predicted prices
df

### Graphically showing our prediction of closing stock prices with time

In [None]:
df['Adj. Close'].plot()
df['Predicted'].plot()
plt.legend(loc=4)
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()