In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

In [3]:
np.random.seed(12345)

In [4]:
data = pd.read_csv('GOOG3.csv',parse_dates=True)

In [5]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-08-27,82.683998,82.75,81.287498,81.716499,81.716499,37232000
1,2020-08-28,81.6745,82.358498,81.537498,82.220497,82.220497,29980000
2,2020-08-31,82.394501,82.398247,81.515503,81.709,81.709,36468000
3,2020-09-01,81.831497,83.286499,81.611,83.0355,83.0355,36506000
4,2020-09-02,83.688751,86.658997,83.316498,86.414001,86.414001,50224000


In [6]:
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
499,2022-08-22,116.099998,116.5,114.669998,115.07,115.07,19316000
500,2022-08-23,114.32,115.93,114.300003,114.769997,114.769997,14390700
501,2022-08-24,114.449997,115.717003,113.779999,114.699997,114.699997,16051200
502,2022-08-25,115.150002,117.779999,115.050003,117.699997,117.699997,14874700
503,2022-08-26,115.809998,116.449997,111.25,111.300003,111.300003,31590470


In [7]:
data.shape

(504, 7)

In [8]:
data.isna().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [9]:
data['Date'] = pd.to_datetime(data['Date'],format="%Y-%m-%d")

In [10]:
df_close = data[['Date','Adj Close']]

In [11]:
df_close.columns = ['Date','y']

In [12]:
df_close.sort_values(by='Date', inplace=True)

In [13]:
df_close.head(10)

Unnamed: 0,Date,y
0,2020-08-27,81.716499
1,2020-08-28,82.220497
2,2020-08-31,81.709
3,2020-09-01,83.0355
4,2020-09-02,86.414001
5,2020-09-03,82.092003
6,2020-09-04,79.552002
7,2020-09-08,76.619499
8,2020-09-09,77.848
9,2020-09-10,76.600998


In [14]:
df_close.tail(5)

Unnamed: 0,Date,y
499,2022-08-22,115.07
500,2022-08-23,114.769997
501,2022-08-24,114.699997
502,2022-08-25,117.699997
503,2022-08-26,111.300003


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize = (20,6))
sns.lineplot(data=df_close, x='Date',y='y')
plt.title('Google Stock Price from Aug 2020 to Aug 2022')
plt.ylabel('Adjusted Closing Stock Price')
plt.show()

In [None]:
# 1,2,3,4,5,6,7,8,9,10

# 1,2,3,4 -> 5
# 2,3,4,5 -> 6
# 3,4,5,6 -> 7



In [None]:
window_size = 45
for i in range(window_size):
    df_close['y'+str(i+1)] = df_close['y'].shift(i+1)

In [None]:
df_close.iloc[0:5,:]

In [None]:
df_close.iloc[45:50,:]

In [None]:
df_close.shape

In [None]:
df_close.dropna(axis=0, inplace=True)

In [None]:
df_close.shape

In [None]:
df_train = df_close[df_close['Date'] <= pd.to_datetime('2022-08-14')]
df_test = df_close[df_close['Date'] > pd.to_datetime('2022-08-14')]

In [None]:
print(f" Training shape {df_train.shape} Testing Shape {df_test.shape}")

In [None]:
df_train.tail()

In [None]:
df_test.head(10)

In [None]:
X_train = df_train.drop(['Date','y'],axis=1)
y_train = df_train['y']

X_test = df_test.drop(['Date','y'],axis=1)
y_test = df_test['y']

In [None]:
print(f" X Train Shape {X_train.shape} and X Test shape {X_test.shape}")

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

In [None]:
tf.random.set_seed(12345)

In [None]:
X_train.shape

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1],1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1],1)

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
model = Sequential()

In [None]:
model.add(LSTM(128,
               activation='relu',
               return_sequences = True,
               input_shape=(None,1)))

In [None]:
model.add(LSTM(128,
               activation = 'relu'))

In [None]:
model.add(Dense(units = 1,
                activation = 'linear'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='huber' ,
              metrics = 'mse')

In [None]:
# Mape = Avg(Abs(Actual - Predicted)/Actual)
# 1 - MAPE = Accuracy

In [None]:
cp = tf.keras.callbacks.ModelCheckpoint('GoogModel.h5',
                                        monitor='val_mse',
                                        save_weights=True,
                                        save_best_only = True,
                                        verbose=1)

In [None]:
model.fit(X_train,
          y_train,
          validation_data = (X_test, y_test),
          epochs = 200,
          callbacks = [cp])

In [None]:
new_model = tf.keras.models.load_model('GoogModel.h5')

In [None]:
y_tr_pred = new_model.predict(X_train)

In [None]:
y_ts_pred = new_model.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
# MAPE = np.mean(np.abs(actual - predicted)/actual)

In [None]:
print(f"Training set accuracy : {np.round(100 - mean_absolute_percentage_error(y_true = y_train, y_pred = y_tr_pred)*100,2)}%")

In [None]:
print(f"Test set accuracy : {np.round(100 - mean_absolute_percentage_error(y_true = y_test, y_pred = y_ts_pred)*100,2)}%")

In [None]:
df1 = df_train[['Date','y']]
df1['Pred'] = y_tr_pred

In [None]:
df1.head()

In [None]:
plt.figure(figsize = (20,6))
plt.plot(df1['Date'], df1['y'],color='blue', label='Actual')
plt.plot(df1['Date'], df1['Pred'],color='red', label='Predicted')
plt.title("Google Stock Price - Actual vs Predicted " )
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.legend()
plt.show()

In [None]:
new_test_df = df_test[['Date','y']].copy()

In [None]:
new_test_df['yhat'] = y_ts_pred

In [None]:
new_test_df

In [None]:
plt.figure(figsize = (12,4))
plt.plot(new_test_df['Date'], new_test_df['y'],color='blue', label='Actual')
plt.plot(new_test_df['Date'], new_test_df['yhat'],color='red', label='Predicted')
plt.title("Google Stock Price - Actual vs Predicted on Test Set" )
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.legend()
plt.show()

In [None]:
new_test_df

In [None]:
df_close.tail()