In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime


In [2]:
%pip install -q yfinance

Note: you may need to restart the kernel to use updated packages.


## Data preprocessing

In [3]:
# for reading the stock data from Yahoo finance
from pandas_datareader.data import DataReader
from pandas_datareader import data as pdr
import yfinance as yf


In [4]:
yf.pdr_override()

In [5]:
stock_list= ['NFLX', 'AMZN', 'DIS', 'ROKU']

In [6]:
end= datetime.now()
start= datetime(end.year -1, end.month,end.day)

In [7]:
for stock in stock_list:
  globals()[stock]= yf.download(stock,start,end)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [8]:
# creating the company list for analyzing their stocks
comp_list= [NFLX, AMZN, DIS, ROKU]
comp_name= ["NETFLIX","AMAZON","DISNEY","ROKU"]

In [9]:
for company, com_name in zip(comp_list, comp_name):
  company["comp_name"]= com_name

In [10]:
df= pd.concat(comp_list,axis=0)
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,comp_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-10-19,64.050003,64.800003,62.119999,62.48,62.48,6142700,ROKU
2023-10-20,62.400002,62.630001,59.66,59.799999,59.799999,5231900,ROKU
2023-10-23,59.580002,62.035,58.900002,61.18,61.18,6281200,ROKU
2023-10-24,61.669998,63.07,60.619999,61.419998,61.419998,5607700,ROKU
2023-10-25,62.75,64.239998,58.154999,58.5,58.5,6641900,ROKU


In [11]:
df1= ROKU
df2= DIS
df3= AMZN
df4= NFLX

In [12]:
df1.shape

(251, 7)

## Model Evaluation


In [21]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error,r2_score


### Model Evaluation for ROKU

In [14]:
train, test= train_test_split(df1,test_size=0.2)

In [15]:
train.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,comp_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-05-15,53.709999,54.849998,52.939999,54.02,54.02,4724200,ROKU
2023-10-12,71.379997,71.459999,67.610001,67.769997,67.769997,5520800,ROKU
2023-06-20,70.169998,70.769997,67.699997,67.959999,67.959999,5747900,ROKU
2023-02-15,58.560001,63.750999,58.549999,63.490002,63.490002,17825600,ROKU
2022-12-21,43.400002,44.039001,42.419998,42.950001,42.950001,5117100,ROKU


In [16]:
test.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,comp_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-03-15,60.57,62.669998,59.299999,62.34,62.34,6474300,ROKU
2022-10-31,56.009998,57.25,53.84,55.540001,55.540001,7712700,ROKU
2023-04-27,54.82,58.84,53.830002,57.009998,57.009998,15582400,ROKU
2023-01-20,50.939999,52.59,50.75,52.389999,52.389999,6857500,ROKU
2023-01-25,50.360001,53.299999,49.459999,52.150002,52.150002,12213400,ROKU


In [17]:
x_train1 = train[['Open', 'High', 'Low', 'Volume']].values
x_test1 = test[['Open', 'High', 'Low', 'Volume']].values

In [18]:
y_train1 = train['Close'].values
y_test1 = test['Close'].values

In [19]:
model1= LinearRegression()
model1.fit(x_train1, y_train1)
y_pred1 = model1.predict(x_test1)

In [22]:
model2=  RandomForestRegressor()
model2.fit(x_train1,y_train1)
y_pred2=model2.predict(x_test1)

In [23]:
result1 = model1.predict([[262.000000, 267.899994, 250.029999, 11896100]])
print(result1)

[253.52012484]


In [24]:
result2 = model2.predict([[262.000000, 267.899994, 250.029999, 11896100]])
print(result2)

[94.40139854]


In [28]:
print("Model Evaluation with Linear regression")
print("*"*40)
print("MSE",round(mean_squared_error(y_test1,y_pred1), 2))
print("RMSE",round(np.sqrt(mean_squared_error(y_test1,y_pred1)), 2))
print("MAE",round(mean_absolute_error(y_test1,y_pred1), 2))
print("MAPE",round(mean_absolute_percentage_error(y_test1,y_pred1), 2))
print("R2 Score : ", round(r2_score(y_test1,y_pred1), 2))

Model Evaluation with Linear regression
****************************************
MSE 1.11
RMSE 1.06
MAE 0.8
MAPE 0.01
R2 Score :  0.99


In [30]:
print("Model Evaluation with Random Forest")
print("*"*40)
print("MSE",round(mean_squared_error(y_test1,y_pred2), 2))
print("RMSE",round(np.sqrt(mean_squared_error(y_test1,y_pred2)), 2))
print("MAE",round(mean_absolute_error(y_test1,y_pred2), 2))
print("MAPE",round(mean_absolute_percentage_error(y_test1,y_pred2), 2))
print("R2 Score : ", round(r2_score(y_test1,y_pred2), 2))

Model Evaluation with Random Forest
****************************************
MSE 2.36
RMSE 1.54
MAE 1.21
MAPE 0.02
R2 Score :  0.98


### Model Evaluation for DISNEY

In [31]:
train, test= train_test_split(df2,test_size=0.2)

In [32]:
x_train2 = train[['Open', 'High', 'Low', 'Volume']].values
x_test2 = test[['Open', 'High', 'Low', 'Volume']].values

In [33]:
y_train2 = train['Close'].values
y_test2 = test['Close'].values

In [34]:
model1.fit(x_train2, y_train2)

In [35]:
y_pred3 = model1.predict(x_test2)
y_pred4= model2.predict(x_test2)

In [36]:
result3 = model1.predict([[262.000000, 267.899994, 250.029999, 11896100]])
print(result3)

[257.98579954]


In [37]:
result4= model2.predict([[262.000000, 267.899994, 250.029999, 11896100]])
print(result4)

[94.40139854]


In [40]:
print("Model Evauation for DISNEY with Linear Regression")
print("*"*50)
print("MSE",round(mean_squared_error(y_test2,y_pred3), 2))
print("RMSE",round(np.sqrt(mean_squared_error(y_test2,y_pred3)), 2))
print("MAE",round(mean_absolute_error(y_test2,y_pred3), 2))
print("MAPE",round(mean_absolute_percentage_error(y_test2,y_pred3), 2))
print("R2 Score : ", round(r2_score(y_test2,y_pred3), 2))

Model Evauation for DISNEY with Linear Regression
**************************************************
MSE 0.43
RMSE 0.66
MAE 0.49
MAPE 0.01
R2 Score :  0.99


In [41]:
print("Model Evauation for DISNEY with Random Forest Regression")
print("*"*50)
print("MSE",round(mean_squared_error(y_test2,y_pred4), 2))
print("RMSE",round(np.sqrt(mean_squared_error(y_test2,y_pred4)), 2))
print("MAE",round(mean_absolute_error(y_test2,y_pred4), 2))
print("MAPE",round(mean_absolute_percentage_error(y_test2,y_pred4), 2))
print("R2 Score : ", round(r2_score(y_test2,y_pred4), 2))

Model Evauation for DISNEY with Random Forest Regression
**************************************************
MSE 23.95
RMSE 4.89
MAE 2.97
MAPE 0.03
R2 Score :  0.53


### Model Evaluation of AMZN

In [None]:
train, test= train_test_split(df3,test_size=0.2)
x_train = train[['Open', 'High', 'Low', 'Volume']].values
x_test = test[['Open', 'High', 'Low', 'Volume']].values
y_train = train['Close'].values
y_test = test['Close'].values

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)
y_pred = lin_reg.predict(x_test)
result = lin_reg.predict([[262.000000, 267.899994, 250.029999, 11896100]])
print(result)

[254.76305315]


In [None]:
print("Model Evauation for AMZN")
print("MSE",round(mean_squared_error(y_test,y_pred), 2))
print("RMSE",round(np.sqrt(mean_squared_error(y_test,y_pred)), 2))
print("MAE",round(mean_absolute_error(y_test,y_pred), 2))
print("MAPE",round(mean_absolute_percentage_error(y_test,y_pred), 2))
print("R2 Score : ", round(r2_score(y_test,y_pred), 2))

Model Evauation for AMZN
MSE 0.43
RMSE 0.66
MAE 0.52
MAPE 0.0
R2 Score :  1.0


### Model Evaluation for NFLX

In [None]:
train, test= train_test_split(df4,test_size=0.2)
x_train = train[['Open', 'High', 'Low', 'Volume']].values
x_test = test[['Open', 'High', 'Low', 'Volume']].values
y_train = train['Close'].values
y_test = test['Close'].values

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)
y_pred = lin_reg.predict(x_test)
result = lin_reg.predict([[262.000000, 267.899994, 250.029999, 11896100]])
print(result)

[256.7328319]


In [None]:
print("Model Evauation for AMZN")
print("MSE",round(mean_squared_error(y_test,y_pred), 2))
print("RMSE",round(np.sqrt(mean_squared_error(y_test,y_pred)), 2))
print("MAE",round(mean_absolute_error(y_test,y_pred), 2))
print("MAPE",round(mean_absolute_percentage_error(y_test,y_pred), 2))
print("R2 Score : ", round(r2_score(y_test,y_pred), 2))

Model Evauation for AMZN
MSE 15.15
RMSE 3.89
MAE 3.07
MAPE 0.01
R2 Score :  1.0
