In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
import datetime


In [6]:
df = pd.read_csv('/Users/navin/Documents/Anik/GitHub/kaggle-playground/notebooks/NFLX stock price prediction/datasets/HistoricalData_1744405684704.csv')

In [8]:
df['Close/Last'] = df['Close/Last'].replace('[\$,]', '', regex=True).astype(float)

df['Date'] = pd.to_datetime(df['Date'])

df.sort_values("Date", inplace=True)

df.reset_index(drop=True, inplace=True)

In [9]:
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['DayOfWeek'] = df['Date'].dt.dayofweek

df['MA10'] = df['Close/Last'].rolling(window=10).mean()
df['MA50'] = df['Close/Last'].rolling(window=50).mean()

df.dropna(inplace=True)


In [10]:
features = ['Close/Last', 'MA10', 'MA50', 'Year', 'Month', 'Day', 'DayOfWeek']
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[features])

sequence_length = 60
X, y = [], []

for i in range(sequence_length, len(scaled_data)):
    X.append(scaled_data[i-sequence_length:i])
    y.append(scaled_data[i, 0])

X, y = np.array(X), np.array(y)


In [11]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')


  super().__init__(**kwargs)


In [12]:
history = model.fit(X, y, epochs=1000, batch_size=32, verbose=1)


Epoch 1/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 0.0285
Epoch 2/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0030
Epoch 3/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0027
Epoch 4/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0024
Epoch 5/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0021
Epoch 6/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0019
Epoch 7/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0018
Epoch 8/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0016
Epoch 9/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0017
Epoch 10/1000
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms

In [None]:
test_df = pd.read_csv("test.csv")
test_df['Date'] = pd.to_datetime(test_df['Date'])
test_df = test_df.sort_values('Date')
test_df['Close'] = test_df['Close'].replace('[\$,]', '', regex=True).astype(float)

# Feature engineering
test_df['Year'] = test_df['Date'].dt.year
test_df['Month'] = test_df['Date'].dt.month
test_df['Day'] = test_df['Date'].dt.day
test_df['DayOfWeek'] = test_df['Date'].dt.dayofweek
test_df['MA10'] = test_df['Close'].rolling(window=10).mean()
test_df['MA50'] = test_df['Close'].rolling(window=50).mean()

# Combine last training rows with test for sequence
combined = pd.concat([df[features], test_df[features]], ignore_index=True)
combined = combined.dropna()
scaled_combined = scaler.transform(combined)

# Create test sequences
X_test = []
for i in range(len(scaled_combined) - len(test_df), len(scaled_combined)):
    X_test.append(scaled_combined[i-seq_len:i])

X_test = np.array(X_test)


In [None]:
predicted = model.predict(X_test)
predicted = scaler.inverse_transform(np.hstack([predicted, np.zeros((len(predicted), len(features) - 1))]))[:, 0]

submission = pd.DataFrame({
    'DATE': test_df[''],
    'Predicted': predicted
})
submission.to_csv("submission.csv", index=False)


In [None]:
plt.figure(figsize=(14,5))
plt.plot(y[-100:], label='True Price')
plt.plot(model.predict(X[-100:]), label='Predicted')
plt.title('LSTM - Last 100 Predictions')
plt.legend()
plt.show()
