# Install and import libraries

In [1]:
#!pip install yfinance

In [2]:
import pandas as pd
import numpy as np

import yfinance as yf

# Load, exlore and clean data

In [3]:
tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOG', 'META', 'TSLA', 'NVDA', 'PYPL', 'ADBE', 'NFLX']

In [4]:
stock_price_history = yf.download(tickers, start="2010-01-01", end="2020-12-31")

[*********************100%%**********************]  10 of 10 completed


In [5]:
stock_price_history.head()

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAPL,ADBE,AMZN,GOOG,META,MSFT,NFLX,NVDA,PYPL,TSLA,...,AAPL,ADBE,AMZN,GOOG,META,MSFT,NFLX,NVDA,PYPL,TSLA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2010-01-04,6.478997,37.09,6.695,15.610239,,23.522564,7.64,4.240801,,,...,493729600,4710200,151998000,78541293,,38409100,17239600,80020400,,
2010-01-05,6.4902,37.700001,6.7345,15.541497,,23.530169,7.358571,4.302728,,,...,601904800,7108800,177038000,120638494,,49749600,23753100,72864800,,
2010-01-06,6.386965,37.619999,6.6125,15.149715,,23.385761,7.617143,4.330253,,,...,552160000,5336400,143576000,159744526,,58182400,23290400,64916800,,
2010-01-07,6.375158,36.889999,6.5,14.797037,,23.142563,7.485714,4.24539,,,...,477131200,5576700,220604000,257533695,,50559700,9955400,54779200,,
2010-01-08,6.417542,36.689999,6.676,14.994298,,23.302166,7.614286,4.254563,,,...,447610800,5429200,196610000,189680313,,51197400,8180900,47816800,,


In [6]:
df = stock_price_history["Adj Close"].reset_index()

In [7]:
df.fillna(method='ffill', inplace=True) # forward fill
df.fillna(0, inplace=True) # fill the rest with 0 (beginning of the time series)

# Prepare train and test dataset

In [8]:
test_size = int(0.2*len(df)) # 20% test size
train_df = df[:-test_size]
test_df = df[-test_size:]

train_df.shape, test_df.shape

((2215, 11), (553, 11))

# Train models

In [9]:
from prophet import Prophet

In [10]:
# create for loop to train prophet model for each ticker
models = {}

for ticker in tickers:
    model = Prophet()
    model.fit(train_df[["Date", ticker]].rename(columns={"Date": "ds", ticker: "y"}))
    models[ticker] = model

22:17:17 - cmdstanpy - INFO - Chain [1] start processing
22:17:17 - cmdstanpy - INFO - Chain [1] done processing
22:17:17 - cmdstanpy - INFO - Chain [1] start processing
22:17:19 - cmdstanpy - INFO - Chain [1] done processing
22:17:19 - cmdstanpy - INFO - Chain [1] start processing
22:17:20 - cmdstanpy - INFO - Chain [1] done processing
22:17:20 - cmdstanpy - INFO - Chain [1] start processing
22:17:20 - cmdstanpy - INFO - Chain [1] done processing
22:17:20 - cmdstanpy - INFO - Chain [1] start processing
22:17:21 - cmdstanpy - INFO - Chain [1] done processing
22:17:21 - cmdstanpy - INFO - Chain [1] start processing
22:17:22 - cmdstanpy - INFO - Chain [1] done processing
22:17:22 - cmdstanpy - INFO - Chain [1] start processing
22:17:22 - cmdstanpy - INFO - Chain [1] done processing
22:17:23 - cmdstanpy - INFO - Chain [1] start processing
22:17:24 - cmdstanpy - INFO - Chain [1] done processing
22:17:24 - cmdstanpy - INFO - Chain [1] start processing
22:17:25 - cmdstanpy - INFO - Chain [1]

# Explore predictions

In [11]:
from prophet.plot import plot_plotly, plot_components_plotly

In [12]:
model = models[tickers[0]]

future = model.make_future_dataframe(periods=test_size) # create ds for future dates
forecast = model.predict(future) # predict y for future dates

In [13]:
plot_plotly(model, forecast)

In [14]:
plot_components_plotly(model, forecast)

# Evaluate predictions on test set

In [15]:
# import mse
from sklearn.metrics import mean_squared_error
import numpy as np

In [16]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [17]:
armse_scores = {}
rrmse_scores = {}

for ticker in tickers:
    y_true = test_df[ticker]

    model = models[ticker]
    future = model.make_future_dataframe(periods=test_size)
    forecast = model.predict(future)
    y_pred = forecast.iloc[-test_size:]["yhat"]

    armse_scores[ticker] = np.round(rmse(y_true, y_pred),3)
    rrmse_scores[ticker] = np.round(100*armse_scores[ticker]/y_true.mean(), 2)

In [18]:
results = pd.DataFrame([armse_scores, rrmse_scores], index=["Absolute RMSE", "Relative RMSE"]).T.sort_values("Relative RMSE")
results

Unnamed: 0,Absolute RMSE,Relative RMSE
GOOG,8.09,12.36
ADBE,49.689,14.71
META,34.838,17.24
AMZN,21.352,19.6
MSFT,35.18,23.25
PYPL,33.078,25.68
NFLX,126.941,33.54
AAPL,24.305,35.02
NVDA,33.869,49.49
TSLA,61.499,113.67
