### **1. Problem statement**
 - We are given Netflix stock price from 2016 to 2021. 
 - The task is to predict the trend of the stock price for 2022 -**2023**. 

### **2. Import library**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import matplotlib.dates as mdates
import datetime as dt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import plotly.express as px
from plotly import graph_objs as go




### **3. Data processing**

#### 3.0 import the data

In [2]:
dataset_train = pd.read_csv('BX_train.csv')

In [3]:
dataset_train.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-04,28.629999,29.15,28.27,28.950001,20.026834,6938500
1,2016-01-05,29.040001,29.049999,27.92,28.139999,19.466507,6048200
2,2016-01-06,27.52,27.530001,26.950001,27.290001,18.8785,7954000
3,2016-01-07,26.68,27.110001,26.32,26.379999,18.24898,7027400
4,2016-01-08,26.610001,26.809999,25.459999,25.5,17.640223,6510600


In [4]:
dataset_train.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
1505,2021-12-23,130.820007,135.5,130.369995,135.0,129.022964,4729500
1506,2021-12-27,134.75,135.479996,133.440002,135.100006,129.11853,2558200
1507,2021-12-28,135.139999,136.460007,131.350006,131.380005,125.563232,2745400
1508,2021-12-29,131.309998,131.490005,127.900002,128.509995,122.82029,4076500
1509,2021-12-30,128.610001,130.440002,128.309998,128.5,122.81073,2164800


In [5]:
training_set = dataset_train.iloc[:, 1: 2].values
training_set.shape

(1510, 1)

In [6]:
x_dates =  [dt.datetime.strptime(d, '%Y-%m-%d').date() for d in dataset_train['Date']]

fig = px.line(x=x_dates, y=dataset_train['Open'], labels={"x":"Time Scale", "y":"Currency in USD "},title='Real Stock Price of NETFLIX.INC',width=1200,height=600)
fig.show()

#### 3.1 Feature scaling

In [7]:
sc = StandardScaler()
#fit: get min/max of train data
training_set_scaled = sc.fit_transform(training_set)
training_set_scaled.shape

(1510, 1)

#### 3.2 Data structure creation

- taking the reference of past 2 days of data to predict the future stock price.  
- It is observed that taking 2 days of past data gives us best results.   
- X train will have data of 2 days prior to our date and y train will have data of one day after our date

In [8]:
## 20 timesteps and 1 output
X_train = []
y_train = []
n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 5 # Number of past days we want to use to predict the future.



for i in range(n_past, len(training_set_scaled)- n_future +1):
    X_train.append(training_set_scaled[i-n_past: i, 0:training_set_scaled.shape[1]])
    y_train.append(training_set_scaled[i+ n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

In [9]:
X_train.shape

(1505, 5, 1)

In [10]:
y_train.shape

(1505, 1)

#### 3.3 Data reshaping

In [11]:
X_train = np.reshape(X_train, newshape = 
                     (X_train.shape[0], X_train.shape[1], 1))

1. Number of stock prices - 1508  
2. Number of time steps - 5 
3. Number of Indicator - 1  

In [12]:
X_train.shape

(1505, 5, 1)

### **4. Model Building**

#### 4.1 Create model

In [13]:
model = Sequential()
# add 1st lstm layer
model.add(LSTM(units = 75, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(rate = 0.2))

# add 2nd lstm layer: 32 neurons
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(rate = 0.2))

model.add(LSTM(units = 32, return_sequences = False))
model.add(Dropout(rate = 0.2))
 
# add output layer
model.add(Dense(units = 1))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 5, 75)             23100     
_________________________________________________________________
dropout (Dropout)            (None, 5, 75)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 5, 50)             25200     
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 50)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                10624     
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 3

In [14]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

#### 4.2 Model fit

In [15]:
history = model.fit(X_train, y_train,  epochs=100, batch_size=16, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

###**5. Model evaluation**

#### 5.1 Read and convert

In [16]:
dataset_test = pd.read_csv('BX_test.csv')

In [17]:
dataset_test.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-01-03,130.0,130.25,125.910004,126.629997,121.023514,3901500
1,2022-01-04,127.610001,127.629997,123.800003,124.940002,119.408356,3769400
2,2022-01-05,124.650002,124.889999,117.25,117.790001,112.574905,6376600
3,2022-01-06,116.68,119.989998,115.540001,119.160004,113.884254,4678300
4,2022-01-07,118.830002,119.989998,116.349998,116.650002,111.485374,2963200


In [18]:
dataset_test.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
204,2022-10-25,86.300003,91.599998,86.260002,91.550003,90.668304,5260100
205,2022-10-26,90.510002,94.110001,90.160004,92.739998,91.84684,4396000
206,2022-10-27,94.0,95.260002,92.68,93.449997,92.549995,4348600
207,2022-10-28,90.519997,94.379997,88.389999,93.18,93.18,5431300
208,2022-10-31,92.220001,93.07,91.129997,91.139999,91.139999,3515400


In [19]:
real_stock_price = dataset_test.iloc[:, 1: 2].values
real_stock_price.shape

(209, 1)

#### 5.2 Concat and convert

In [20]:
#vertical concat use 0, horizontal uses 1
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), 
                          axis = 0)
#use .values to make numpy array
inputs = dataset_total[len(dataset_total) - len(dataset_test) - n_past:].values

#### 5.3 Reshape and scale

In [21]:
#reshape data to only have 1 col
inputs = inputs.reshape(-1, 1)

#scale input
inputs = sc.transform(inputs)

In [22]:
len(inputs)

214

#### 5.4 Create test data strucutre

In [23]:
X_test = []
for i in range(n_past, len(inputs)):
    X_test.append(inputs[i-n_past:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [24]:
X_test.shape

(209, 5, 1)

### **6. Model prediction**

In [25]:
predicted_stock_price = model.predict(X_test)

In [26]:
predicted_stock_price[:5]

array([[2.9419563],
       [2.9374325],
       [2.8745406],
       [2.8079495],
       [2.650269 ]], dtype=float32)

In [27]:
#inverse the scaled value
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

#### 6.1 Result visualization

In [28]:
data = {"Real_Price":real_stock_price[:,0],"Predicted_Price":predicted_stock_price[:,0]}

In [29]:
predicted_stock_price.shape,real_stock_price.shape

((209, 1), (209, 1))

In [30]:
df = pd.DataFrame(data,index=dataset_test['Date'])
df.head(10)

Unnamed: 0_level_0,Real_Price,Predicted_Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-03,130.0,130.98027
2022-01-04,127.610001,130.85376
2022-01-05,124.650002,129.095093
2022-01-06,116.68,127.232986
2022-01-07,118.830002,122.823708
2022-01-10,115.68,121.850716
2022-01-11,115.769997,118.6959
2022-01-12,122.169998,117.453545
2022-01-13,122.610001,119.865669
2022-01-14,116.07,120.805946


In [31]:
#fig = px.line(df,x=df.index, y=["Real_Price","Predicted_Price"],labels={"x":"Time Scale", "y":"Currency in USD "},title='Real Stock Price Vs Predicted Stock Price of NETFLIX.INC',width=1200,height=600)
#fig.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df["Real_Price"], name="Real Stock Price"))
fig.add_trace(go.Scatter(x=df.index, y=df["Predicted_Price"], name="Predicted Stock Price"))
fig.layout.update(title_text='Real Stock Price Vs Predicted Stock Price of NETFLIX.INC', xaxis_rangeslider_visible=True)
fig.show()

In [32]:
rmse=np.sqrt(np.mean(((predicted_stock_price- real_stock_price)**2)))
rmse

4.39673867148788

###**7. Model Forecasting**

In [33]:
train_dates = pd.to_datetime(dataset_test['Date'])

In [34]:
n_future = 200
forecast_period_dates = pd.date_range(list(train_dates)[-1],periods=n_future,freq='1d').tolist()
forecast = model.predict(X_test[-n_future:])

In [35]:
forecast_copies = np.repeat(forecast, dataset_test.shape[1], axis=-1)
y_pred_future = sc.inverse_transform(forecast_copies)[:,0]

In [36]:
forecast_dates = []
for time_i in forecast_period_dates:
    forecast_dates.append(time_i.date())

In [37]:
original = dataset_test[['Date', 'Open']]
original

Unnamed: 0,Date,Open
0,2022-01-03,130.000000
1,2022-01-04,127.610001
2,2022-01-05,124.650002
3,2022-01-06,116.680000
4,2022-01-07,118.830002
...,...,...
204,2022-10-25,86.300003
205,2022-10-26,90.510002
206,2022-10-27,94.000000
207,2022-10-28,90.519997


In [38]:
df_forecast = pd.DataFrame({'Forecast_Date':np.array(forecast_dates), 'Forecast_Price':y_pred_future})
df_forecast["Forecast_Price"].iloc[0]=original["Open"].iloc[-1]
df_forecast[1:]




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Forecast_Date,Forecast_Price
1,2022-11-01,118.820183
2,2022-11-02,117.772255
3,2022-11-03,117.386505
4,2022-11-04,115.488480
5,2022-11-05,113.134460
...,...,...
195,2023-05-14,86.489021
196,2023-05-15,86.211281
197,2023-05-16,88.095261
198,2023-05-17,90.640541


In [39]:
fig = px.line(df_forecast[1:],x='Forecast_Date', y='Forecast_Price',labels={"x":"Time Scale", "y":"Currency in USD "},title='Forecasted Stock Price of NETFLIX.INC',width=1200,height=600)
fig.show()

In [40]:
print(df_forecast[df_forecast.Forecast_Price == df_forecast.Forecast_Price.max()]) #Maximum expected stock price value of Netflix

   Forecast_Date  Forecast_Price
15    2022-11-15      133.072205


In [41]:
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=original['Date'], y=original['Open'], name="Past Stock"))
# fig.add_trace(go.Scatter(x=df_forecast['Forecast_Date'], y=df_forecast['Forecast_Price'], name="Forecasted Stock"))
# fig.layout.update(title_text='Stock Price of NETFLIX.INC', xaxis_rangeslider_visible=True)
# fig.show()