<a href="https://colab.research.google.com/github/Samantha996/Personal-Site/blob/main/StockPredictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Building a Stock Portfolio Tool using Python

Step 1: Importing the Correct Libries we will be using.


In [75]:
import pandas as pd
import numpy as np
import requests
from pandas_datareader import data
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression # train model
import pickle # save model in pickle format. Prevents re-training

 

In [98]:
stock = 'ZM'
def get_data():
  start = "01-01-2013"
  df = data.DataReader(stock,"yahoo",start = start)
  df.to_csv(f"{stock}_20210211.csv")
  print(df.head())
get_data()


                 High        Low       Open      Close    Volume  Adj Close
Date                                                                       
2019-04-18  66.000000  60.320999  65.000000  62.000000  25764700  62.000000
2019-04-22  68.900002  59.939999  61.000000  65.699997   9949700  65.699997
2019-04-23  74.168999  65.550003  66.870003  69.000000   6786500  69.000000
2019-04-24  71.500000  63.160000  71.400002  63.200001   4973500  63.200001
2019-04-25  66.849998  62.599998  64.739998  65.000000   3863300  65.000000


In [99]:
df = pd.read_csv(f"{stock}_20210211.csv", index_col=0)
df.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-04-18,66.0,60.320999,65.0,62.0,25764700,62.0
2019-04-22,68.900002,59.939999,61.0,65.699997,9949700,65.699997
2019-04-23,74.168999,65.550003,66.870003,69.0,6786500,69.0
2019-04-24,71.5,63.16,71.400002,63.200001,4973500,63.200001
2019-04-25,66.849998,62.599998,64.739998,65.0,3863300,65.0


In [100]:
df['HL_pct'] = (df['High']-df['Low'])/df['Low']*100.0
df = df[['Adj Close', 'Volume', 'HL_pct']]
df.head()

Unnamed: 0_level_0,Adj Close,Volume,HL_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-18,62.0,25764700,9.414633
2019-04-22,65.699997,9949700,14.948287
2019-04-23,69.0,6786500,13.148734
2019-04-24,63.200001,4973500,13.20456
2019-04-25,65.0,3863300,6.789138


In [101]:
predict_col = 'Adj Close'
df.fillna(0, inplace =True)

df['future3days'] = df[predict_col].shift(-3)
df.dropna(inplace = True)
df.head()

Unnamed: 0_level_0,Adj Close,Volume,HL_pct,future3days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-04-18,62.0,25764700,9.414633,63.200001
2019-04-22,65.699997,9949700,14.948287,65.0
2019-04-23,69.0,6786500,13.148734,66.220001
2019-04-24,63.200001,4973500,13.20456,68.169998
2019-04-25,65.0,3863300,6.789138,72.470001


In [102]:
#input variables
X = np.array(df.drop(['future3days'], 1))
X = preprocessing.scale(X)
y = np.array(df['future3days'])

#test and train data
#X-Train - input training, x_test - input for pred data
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.3)

#Multiple Linear Regression Model
clf = LinearRegression()
clf.fit(X_train, y_train)

#saving in pickle format to not have to run model again
with open(f'{stock}_predictionmodel.pickle', 'wb') as f:
  pickle.dump(clf, f)

#Accuracy score
confidence = clf.score(X_train, y_train)
print("Accuracy:", confidence)


Accuracy: 0.9808754914482374


In [69]:
#with open(f'{stock}_predictionmodel.pickle', 'rb') as f:
 # clf = pickle.load(f)

#confidence = clf.score(X_train, y_train)
#print("Accuracy:", confidence)

Accuracy: 0.9931878908947266


In [103]:
for X, y in zip(X_test, y_test):
  print(f"Model predicted price (3days in future):{clf.predict([X])[0]}, Actual price at 3 days later: {y}" )

Model predicted price (3days in future):97.21528714492466, Actual price at 3 days later: 91.7300033569336
Model predicted price (3days in future):112.71734408097855, Actual price at 3 days later: 111.0999984741211
Model predicted price (3days in future):412.78010621077317, Actual price at 3 days later: 430.2799987792969
Model predicted price (3days in future):71.95820163779898, Actual price at 3 days later: 65.0
Model predicted price (3days in future):78.4208828772791, Actual price at 3 days later: 70.44000244140625
Model predicted price (3days in future):387.54479182777214, Actual price at 3 days later: 430.0299987792969
Model predicted price (3days in future):106.56868790670664, Actual price at 3 days later: 105.01000213623048
Model predicted price (3days in future):84.10969060467576, Actual price at 3 days later: 78.04000091552734
Model predicted price (3days in future):205.76433386773087, Actual price at 3 days later: 219.5200042724609
Model predicted price (3days in future):68.971

In [104]:
df_test = pd.read_csv(f"{stock}_20210211.csv", index_col = 0)
df_test['HL_pct'] = (df_test['High']-df_test['Low'])/df_test['Low']*100.0
df_test = df_test[['Adj Close', 'Volume', 'HL_pct']]

X_new = np.array(df_test)
X_new = preprocessing.scale(X_new)

for x in (X_new):
  print(f"Model predicted price (3days in future): {clf.predict([x])[0]}")

Model predicted price (3days in future): 63.67519187738327
Model predicted price (3days in future): 71.1346777018966
Model predicted price (3days in future): 74.15513410791775
Model predicted price (3days in future): 68.73497045346738
Model predicted price (3days in future): 68.48607669800415
Model predicted price (3days in future): 69.48397476554928
Model predicted price (3days in future): 71.50660194036516
Model predicted price (3days in future): 76.41817503952785
Model predicted price (3days in future): 76.76598541082352
Model predicted price (3days in future): 79.61908341052387
Model predicted price (3days in future): 82.57230879099009
Model predicted price (3days in future): 82.20466894041003
Model predicted price (3days in future): 76.78406939669078
Model predicted price (3days in future): 80.82557344212438
Model predicted price (3days in future): 77.8589965693193
Model predicted price (3days in future): 83.02423751116189
Model predicted price (3days in future): 76.92492360156335