### **1. Problem statement**
 - We are given Netflix stock price from 2016 to 2021. 
 - The task is to predict the trend of the stock price for 2022-2023. 

### **2. Import library**

In [48]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.dates as mdates
import datetime as dt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import plotly.express as px
from plotly import graph_objs as go
from sklearn.metrics import r2_score

### **3. Data processing**

#### 3.0 import the data

In [49]:
dataset_train = pd.read_csv('NFLX_train.csv')

In [50]:
dataset_train.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-04,109.0,110.0,105.209999,109.959999,109.959999,20794800
1,2016-01-05,110.449997,110.580002,105.849998,107.660004,107.660004,17664600
2,2016-01-06,105.290001,117.910004,104.959999,117.68,117.68,33045700
3,2016-01-07,116.360001,122.18,112.290001,114.559998,114.559998,33636700
4,2016-01-08,116.330002,117.720001,111.099998,111.389999,111.389999,18067100


In [51]:
dataset_train.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
1505,2021-12-23,616.400024,616.880005,607.570007,614.090027,614.090027,1621100
1506,2021-12-27,615.0,615.0,609.25,613.119995,613.119995,2061500
1507,2021-12-28,614.950012,618.409973,609.690002,610.710022,610.710022,1882800
1508,2021-12-29,610.710022,613.97998,604.679993,610.539978,610.539978,1287200
1509,2021-12-30,612.98999,620.609985,611.23999,612.090027,612.090027,1625100


In [52]:
training_set = dataset_train.iloc[:, 1: 2].values
training_set.shape

(1510, 1)

In [53]:
x_dates =  [dt.datetime.strptime(d, '%Y-%m-%d').date() for d in dataset_train['Date']]

fig = px.line(x=x_dates, y=dataset_train['Open'], labels={"x":"Time Scale", "y":"Currency in USD "},title='Real Stock Price of NETFLIX.INC',width=1300,height=600)
fig.show()

#### 3.1 Feature scaling

In [54]:
sc = StandardScaler()
#fit: get min/max of train data
training_set_scaled = sc.fit_transform(training_set)
training_set_scaled.shape

(1510, 1)

#### 3.2 Data structure creation

- taking the reference of past 5 days of data to predict the future stock price.  
- It is observed that taking 5 days of past data gives us best results.   
- X train will have data of 5 days prior to our date and y train will have data of one day after our date

In [55]:
X_train = []
y_train = []
n_future = 1  # Number of days we want to look into the future based on the past days.
n_past = 5 # Number of past days we want to use to predict the future.



for i in range(n_past, len(training_set_scaled)- n_future +1):
    X_train.append(training_set_scaled[i-n_past: i, 0:training_set_scaled.shape[1]])
    y_train.append(training_set_scaled[i+ n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

In [56]:
X_train.shape

(1505, 5, 1)

In [57]:
y_train.shape

(1505, 1)

#### 3.3 Data reshaping

In [58]:
X_train = np.reshape(X_train, newshape = 
                     (X_train.shape[0], X_train.shape[1], 1))

1. Number of stock prices - 1508  
2. Number of time steps - 5 
3. Number of Indicator - 1  

In [59]:
X_train.shape

(1505, 5, 1)

### **4. Model Building**

#### 4.1 Create model

In [60]:
model = Sequential()
# add 1st lstm layer
model.add(LSTM(units = 75, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(rate = 0.2))

# add 2nd lstm layer
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(rate = 0.2))

# add 3rd lstm layer
model.add(LSTM(units = 32, return_sequences = False))
model.add(Dropout(rate = 0.2))
 
# add output layer
model.add(Dense(units = 1))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 5, 75)             23100     
_________________________________________________________________
dropout_3 (Dropout)          (None, 5, 75)             0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 5, 50)             25200     
_________________________________________________________________
dropout_4 (Dropout)          (None, 5, 50)             0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 32)                10624     
_________________________________________________________________
dropout_5 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                

In [61]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

#### 4.2 Model fit

In [62]:
history = model.fit(X_train, y_train,  epochs=100, batch_size=32, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [92]:
fig = px.line(y=history.history['loss'], labels={"x":"epoch", "y":"loss "},title='model loss')
fig.show()


###**5. Model evaluation**

#### 5.1 Read and convert

In [64]:
dataset_test = pd.read_csv('NFLX_test.csv')

In [65]:
dataset_test.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-01-03,605.609985,609.98999,590.559998,597.369995,597.369995,3067500
1,2022-01-04,599.909973,600.409973,581.599976,591.150024,591.150024,4393100
2,2022-01-05,592.0,592.840027,566.880005,567.52002,567.52002,4148700
3,2022-01-06,554.340027,563.359985,542.01001,553.289978,553.289978,5711800
4,2022-01-07,549.460022,553.429993,538.219971,541.059998,541.059998,3382900


In [66]:
dataset_test.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
201,2022-10-20,272.779999,278.380005,266.380005,268.160004,268.160004,17207100
202,2022-10-21,268.950012,290.75,265.679993,289.570007,289.570007,24862700
203,2022-10-24,290.230011,290.48999,280.359985,282.450012,282.450012,13326400
204,2022-10-25,286.950012,297.589996,285.549988,291.019989,291.019989,15100700
205,2022-10-26,290.040009,305.630005,288.040009,298.619995,298.619995,15714100


In [67]:
real_stock_price = dataset_test.iloc[:, 1: 2].values
real_stock_price.shape

(206, 1)

#### 5.2 Concat and convert

In [68]:
#vertical concat use 0, horizontal uses 1
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), 
                          axis = 0)
#use .values to make numpy array
inputs = dataset_total[len(dataset_total) - len(dataset_test) - n_past:].values

#### 5.3 Reshape and scale

In [69]:
#reshape data to only have 1 col
inputs = inputs.reshape(-1, 1)

#scale input
inputs = sc.transform(inputs)

In [70]:
len(inputs)

211

#### 5.4 Create test data strucutre

In [71]:
X_test = []
for i in range(n_past, len(inputs)):
    X_test.append(inputs[i-n_past:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [72]:
X_test.shape

(206, 5, 1)

### **6. Model prediction**

In [73]:
predicted_stock_price = model.predict(X_test)

In [74]:
predicted_stock_price[:5]

array([[1.7502   ],
       [1.7212462],
       [1.6913627],
       [1.6519445],
       [1.4958247]], dtype=float32)

In [75]:
#inverse the scaled value
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

#### 6.1 Result visualization

In [76]:
data = {"Real_Price":real_stock_price[:,0],"Predicted_Price":predicted_stock_price[:,0]}

In [77]:
predicted_stock_price.shape,real_stock_price.shape

((206, 1), (206, 1))

In [78]:
df = pd.DataFrame(data,index=dataset_test['Date'])
df.head(10)

Unnamed: 0_level_0,Real_Price,Predicted_Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-03,605.609985,602.163757
2022-01-04,599.909973,597.496704
2022-01-05,592.0,592.67981
2022-01-06,554.340027,586.32605
2022-01-07,549.460022,561.161194
2022-01-10,538.48999,550.136475
2022-01-11,536.98999,539.786987
2022-01-12,544.27002,534.788269
2022-01-13,537.059998,537.058105
2022-01-14,517.599976,533.839478


In [79]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df["Real_Price"], name="Real Stock Price"))
fig.add_trace(go.Scatter(x=df.index, y=df["Predicted_Price"], name="Predicted Stock Price"))
fig.layout.update(title_text='Real Stock Price Vs Predicted Stock Price of NETFLIX.INC', xaxis_rangeslider_visible=True)
fig.show()

In [80]:
# rmse=np.sqrt(np.mean(((predicted_stock_price- real_stock_price)**2)))
# rmse

r2_score(real_stock_price, predicted_stock_price)

0.9693953777770716

###**7. Model Forecasting**

In [81]:
train_dates = pd.to_datetime(dataset_test['Date'])

In [82]:
n_future = 200
forecast_period_dates = pd.date_range(list(train_dates)[-1],periods=n_future,freq='2d').tolist()
forecast = model.predict(X_train[-n_future:])

In [83]:
forecast_copies = np.repeat(forecast, dataset_test.shape[1], axis=-1)
y_pred_future = sc.inverse_transform(forecast_copies)[:,0]

In [84]:
forecast_dates = []
for time_i in forecast_period_dates:
    forecast_dates.append(time_i.date())

In [85]:
original = dataset_test[['Date', 'Open']]
original

Unnamed: 0,Date,Open
0,2022-01-03,605.609985
1,2022-01-04,599.909973
2,2022-01-05,592.000000
3,2022-01-06,554.340027
4,2022-01-07,549.460022
...,...,...
201,2022-10-20,272.779999
202,2022-10-21,268.950012
203,2022-10-24,290.230011
204,2022-10-25,286.950012


In [86]:
df_forecast = pd.DataFrame({'Forecast_Date':np.array(forecast_dates), 'Forecast_Price':y_pred_future})
df_forecast["Forecast_Price"].iloc[0]=original["Open"].iloc[-1]
df_forecast[1:]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Forecast_Date,Forecast_Price
1,2022-10-28,514.226257
2,2022-10-30,506.709564
3,2022-11-01,505.861755
4,2022-11-03,517.383057
5,2022-11-05,524.406372
...,...,...
195,2023-11-20,591.056152
196,2023-11-22,600.744995
197,2023-11-24,602.460510
198,2023-11-26,603.365051


In [87]:
fig = px.line(df_forecast[1:],x='Forecast_Date', y='Forecast_Price',labels={"x":"Time Scale", "y":"Currency in USD "},title='Forecasted Stock Price of NETFLIX.INC',width=1200,height=600)
fig.show()

In [88]:
print(df_forecast[df_forecast.Forecast_Price == df_forecast.Forecast_Price.max()]) #Maximum expected stock price value of Netflix

    Forecast_Date  Forecast_Price
173    2023-10-07      670.635742
