In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [None]:
train_path = 'data/train/Google_Stock_Price_Train.csv'
google_df = pd.read_csv(
    filepath_or_buffer=train_path,
    index_col='Date',
    parse_dates=True,
    decimal='.',
    thousands=',')

In [None]:
google_df.head()

In [None]:
google_df.describe(include='all')

In [None]:
google_df.info()

# Check if there are any NaN (missing values) in the dataset.

In [None]:
google_df.isna().any()

In [None]:
google_df['Open'].plot(
    figsize=(16, 6),
    label='Open stock prices',
    xlabel='Time',
    ylabel='Price'
)
plt.legend()
plt.show()




# Find 7 Day Rolling Mean

In [None]:
rolling_mean_7days = google_df.rolling(window=7).mean()


In [None]:
rolling_mean_7days.head(20)

In [None]:
plt.figure(figsize=(16, 6))
google_df['Open'].plot(
    label='Open stock prices',
)
google_df['Close'].rolling(window=30).mean().plot(
    label='Close stock prices Moving Average [30days]'
)
plt.legend()
plt.show()

In [None]:
google_df['Open: 30 Day Mean'] = google_df['Open'].rolling(window=30).mean()
google_df['Close: 30 Day Mean'] = google_df['Close'].rolling(window=30).mean()

# Optionally specify a minimum number of periods
The minimum number of observations per window should be one

In [None]:
plt.figure(figsize=(16, 6))
google_df['Close'].plot()
google_df['Close'].expanding(min_periods=1).mean().plot()
plt.show()

In [None]:
training_set = pd.DataFrame(data=google_df['Open'])
training_set.head()

# Data Cleaning

In [None]:
training_set.isna().any()

# Feature Scaling (mostly numerical features)

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
training_set.shape

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
training_set_scaled = scaler.fit_transform(training_set)
training_set_scaled = training_set_scaled.squeeze() # flatten the 2d matrix
training_set_scaled.shape

# Create a data structure with features as Open prices from 60 past days, and target is the next Open price

In [None]:
X_train = []
y_train = []
for i in range(60, len(training_set_scaled)):
    X_train.append(training_set_scaled[i - 60:i]) # 60 past days as features
    y_train.append(training_set_scaled[i]) # next day as target
X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
print(X_train.shape)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [None]:
print(X_train.shape)

# Build RNN LSTM Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

In [None]:
regressor = Sequential()

In [None]:
regressor.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units=50,))
regressor.add(Dropout(0.2))

regressor.add(Dense(units=1))



In [None]:
regressor.compile(optimizer='Adam', loss='mean_squared_error')



In [None]:
regressor.fit(X_train, y_train, epochs=100, batch_size=32)