# 1- Importing libreries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.express as px

from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM, Dropout , GRU
from tensorflow.keras.callbacks import EarlyStopping

import warnings
from warnings import filterwarnings
warnings.simplefilter(action='ignore')
%matplotlib inline

# 2- Importing Data

In [None]:
Data = pd.read_csv('../input/google-stock-prediction/GOOG.csv',parse_dates=True)

In [None]:
df = pd.DataFrame(Data)
df1 = df.copy()
df

# 3- Introduction to the dataset

### 
**In this dataset, we provide a comprehensive collection of fundamental stock metrics essential for analyzing stock performance and behavior. These metrics offer valuable insights into a stock's performance on specific trading days and account for various corporate actions. Let's delve into the dataset and introduce you to these critical stock metrics:**

**Close Price:** The closing price of a stock is the final price at which a stock is traded on a given trading day. It's one of the most commonly used prices to analyze a stock's performance.

**High Price:** The highest price at which a stock traded during a specific trading day. It gives an idea of the highest level of demand for the stock during that day.

**Low Price:** The lowest price at which a stock traded during a specific trading day. It indicates the lowest level to which the stock's price dropped during the day.

**Open Price:** The price of a stock at the beginning of a trading day. It's the price at which the first trade occurred on that day.

**Volume:** The total number of shares traded during a trading day. Volume is a measure of market activity and liquidity for a stock.

**Adjusted Close Price:** The closing price of a stock adjusted for any corporate actions like dividends, stock splits, or other events that could affect the stock price.

**Adjusted High Price:** The highest price of a stock during a trading day, adjusted for any corporate actions.

**Adjusted Low Price:** The lowest price of a stock during a trading day, adjusted for any corporate actions.

**Adjusted Open Price:** The opening price of a stock at the beginning of a trading day, adjusted for any corporate actions.

**Adjusted Volume:** The trading volume of a stock adjusted for any corporate actions. This can provide a clearer picture of trading activity.

**Dividend Cash:** The amount of money paid by a company to its shareholders as a portion of its profits. Dividends are typically paid on a per-share basis.

**Split Factor:** If a stock undergoes a stock split, the split factor indicates the ratio by which the shares were split. For instance, a 2-for-1 split means that for every old share, you now have 2 new shares.

These features provide a comprehensive view of a stock's trading activity, price movement, and any adjustments made due to corporate events. Analysts and traders use these features to analyze historical trends, make predictions, and assess the overall health of a company's stock.

In [None]:
df.describe().T

In [None]:
df.info()

In [None]:
df.columns

In [None]:
isNull = df.isnull().sum().to_frame(name='isNull').T
isNa = df.isna().sum().to_frame(name='isNa').T
Unique = df.nunique().to_frame(name='Unique').T
summary = pd.concat([Unique, isNa, isNull])
summary

In [None]:
df.duplicated().sum()

# 4- Preprocessing of the Dataset

In [None]:
df['Date'] = df['date'].str.split(' ').str.get(0)
df

In [None]:
df.drop(columns=['date','symbol'],inplace=True)

In [None]:
df['Date']= pd.to_datetime(df['Date'])
df = df.set_index('Date')
df


# 5- Exploratory Data Analysis (EDA)

In [None]:
font1 = {'family':'serif','size':18}
font2 = {'family':'serif','size':15}
font3 = {'family':'serif','size':13}

In [None]:
colors =['blue','Red', 'Yellow','turquoise','blue','Red', 'Yellow','turquoise', 'blue','Red', 'Yellow','turquoise']
colors= ['lightskyblue' , 'lightpink' , 'cadetblue','lightskyblue' , 'lightpink' , 'cadetblue','lightskyblue' , 'lightpink' , 'cadetblue','lightskyblue' , 'lightpink' , 'cadetblue']
f = plt.figure()
f.set_figwidth(20)
f.set_figheight(40)

plt.subplots_adjust(left=0.1,
                    bottom=0.1,
                    right=0.9,
                    top=0.9,
                    wspace=0.2,
                    hspace=0.5)
i=1
for column in df.columns: 
    plt.subplot(6,2,i)
    plt.plot(df[column], color=colors[i-1])
    plt.title(column,backgroundcolor='grey',color='white',fontdict=font1)
    plt.xticks(fontsize=13)
    plt.yticks(fontsize=13)
    plt.xlabel('Year',fontdict=font2,labelpad=15)
    plt.ylabel("Price",fontdict=font2,labelpad=15)
    plt.grid()
    i+=1

    

In [None]:
df2 = df.drop(columns=['volume','divCash','splitFactor','adjVolume'])

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
df2.plot(ax=ax, alpha=0.5)
ax.set_title('Stock Price Analysis', backgroundcolor='grey',color='white',fontdict=font2)
ax.set_xlabel('Year',fontdict=font3,labelpad=15)
ax.set_ylabel('Price',fontdict=font3,labelpad=15)


### Examining the dataset spanning from 2016 to 2021, we can now observe the variations and advancements in open, low, high, and close values over these years:

In [None]:
df1['Date'] = df1['date'].str.split(' ').str.get(0)
df1.drop(columns=['symbol','date','divCash','splitFactor'],inplace=True)

In [None]:
df_2016 = df1[(df1['Date']>='2016-01-01') & (df1['Date'] <='2016-12-31')]
df_2016['Date'] = pd.to_datetime(df_2016['Date'])
df_2016 = df_2016.set_index('Date')

In [None]:
df_2021 = df1[(df1['Date']>='2021-01-01') & (df1['Date'] <='2021-12-31')]
df_2021['Date'] = pd.to_datetime(df_2021['Date'])
df_2021 = df_2021.set_index('Date')

In [None]:
d2016 = df_2016.resample(rule='MS').mean()
d2021 = df_2021.resample(rule='MS').mean()


In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 15))

# Plot for 2016
d2016[['close', 'high', 'low', 'open']].plot(ax=ax1)
ax1.set_title('Year 2016 Analysis', backgroundcolor='grey',color='white',fontdict=font2)  
ax1.set_xlabel('Month',fontdict=font3)
#ax1.title.set_position([.5, 1.05])

# Plot for 2021
d2021[['close', 'high', 'low', 'open']].plot(ax=ax2)
ax2.set_title('Year 2021 Analysis',backgroundcolor='grey',color='white',fontdict=font2) 
ax2.set_xlabel('Month',fontdict=font3)
#ax1.title.set_position([.5, 1.05])

plt.subplots_adjust(hspace=0.4)

### Visualization of Yearly Mean Prices for Financial Indicators (2016-2021):

In [None]:
f = plt.figure()
f.set_figwidth(20)
f.set_figheight(40)

plt.subplots_adjust(left=0.1,
                    bottom=0.1,
                    right=0.9,
                    top=0.9,
                    wspace=0.2,
                    hspace=0.5)
i=1
for column in df.drop(columns=['divCash','splitFactor']).columns: 
    plt.subplot(5,2,i)
    ax = df[column].resample('A').mean().plot.bar(color = ['lightskyblue' , 'lightpink' , 'cadetblue','lightskyblue' , 'lightpink' , 'cadetblue'])
    plt.xticks(rotation=45,fontsize=13)
    plt.yticks(fontsize=13)
    plt.title(f'Yearly end Mean {column} Price',backgroundcolor='grey',color='white',fontdict=font1)
    plt.xlabel('Date',fontdict=font2,labelpad=15)
    #ax.xaxis.set_major_formatter(dates.DateFormatter("%a-%B-%d"))
    ax.yaxis.grid()
    plt.tight_layout()
    i+=1


### By utilizing rolling techniques such as 6-day, 30-day, and 60-day rolling averages, this visualization provides insights into the impact of moving averages on price trends over time. Gain a comprehensive understanding of how rolling averages influence the behavior of financial data in this informative exploration of closing prices and their trends.

In [None]:
W6 = df.rolling(window=6).mean()
W30 = df.rolling(window=30).mean()
W60 = df.rolling(window=60).mean()

### Explore the dynamic relationship between closing prices and trend patterns through the application of rolling averages in this analysis:

In [None]:
plt.figure(figsize=(12, 9))
df['close'].plot(label='Close Price').autoscale(axis='x',tight=True)
W6['close'].plot(label='Business days rolling').autoscale(axis='x',tight=True)


plt.legend()
plt.title('Moving Averages Analysis',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()
plt.show()


 

In [None]:
plt.figure(figsize=(12, 9))
df['close'].plot(label='Close Price').autoscale(axis='x',tight=True)
W30['close'].plot(label='30 Days rolling').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Moving Averages Analysis',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()
plt.show()


 

In [None]:
plt.figure(figsize=(12, 9))
df['close'].plot(label='Close Price').autoscale(axis='x',tight=True)
W60['close'].plot(label='60 Days rolling').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Moving Averages Analysis',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()
plt.show()


 

In [None]:
color = 'cadetblue'

for col in df:
    fig = px.box(df, x=col,
                 color_discrete_sequence=[color],
                 template='ggplot2')
    fig.show()

# 6- Preparing Dataset

# 6-1 Normalizing Data

In [None]:
DF = df[['close','high','low','open']] 

In [None]:
scaler = MinMaxScaler()
DF[DF.columns] = scaler.fit_transform(DF)
DF.shape

# 6-1 Splitting the Data into two folders: Train data and Test data

In [None]:
training_size = round(len(DF) * 0.80) 

train_data = DF.iloc[:training_size,0:4]
test_data  = DF.iloc[training_size:,0:4]

train_data.shape, test_data.shape

# 6-3 Creating training data sequences and labels

In [None]:
def prepare_time_series_data(Data,window_size):
    #Creates sequences of data and corresponding labels for training and testing. 
  sequences = []
  labels = []
  i = 0

  for j in range(window_size,len(Data)):
    sequences.append(Data.iloc[i:j])
    labels.append(Data.iloc[j])
    i += 1
    
  return np.array(sequences),np.array(labels)

### I experimented with window sizes of 5, 7, 20, and 60. After analyzing the results, it became evident that a window size of 60 yielded superior performance:

In [None]:
X_train, y_train = prepare_time_series_data(train_data,60)
X_test, y_test = prepare_time_series_data(test_data,60)

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

# 7- Modeling

# 7-1 LSTM Models


# 7-1-1 The First and Simple LSTM Model

In [None]:
length = 60
LSTM1 = Sequential()

# Simple RNN layer
LSTM1.add(LSTM(100,return_sequences=True,input_shape=(length,X_train.shape[2])))
LSTM1.add(Dropout(0.2))
LSTM1.add(LSTM(100,return_sequences=False,input_shape=(length,X_train.shape[2])))
LSTM1.add(Dropout(0.2))
LSTM1.add(Dense(X_train.shape[2]))

LSTM1.compile(optimizer='adam', loss='mean_squared_error',metrics=['mean_absolute_error'])


In [None]:
LSTM1.summary()

In [None]:
early_stop = EarlyStopping(monitor='loss',patience=5)

In [None]:
LSTM1.fit(X_train, y_train,epochs=30,validation_data=(X_test, y_test), batch_size=32,callbacks=[early_stop])


In [None]:
LSTM1.history.history.keys()

# 7-1-1-1 Visualizing Model Metrics 

In [None]:
title=' Loss and Mean_absolute_error over Epochs '

xlabel=' Epochs '

LSTM1_losses = pd.DataFrame(LSTM1.history.history)
ax = LSTM1_losses.plot(figsize=(10,6),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel);


In [None]:
def highlight_best(data):
    data_highlighted = data.copy()
    min_loss = data_highlighted['loss'].min()
    min_mae = data_highlighted['mean_absolute_error'].min()
    min_val_loss = data_highlighted['val_loss'].min()
    min_val_mae = data_highlighted['val_mean_absolute_error'].min()
    # Create boolean masks to identify the maximum values
    min_loss = data_highlighted['loss'] == min_loss
    min_mae = data_highlighted['mean_absolute_error'] == min_mae
    min_val_loss = data_highlighted['val_loss'] == min_val_loss
    min_val_mae = data_highlighted['val_mean_absolute_error'] == min_val_mae
    # Apply a custom highlighting style to the maximum values using CSS
    data_highlighted = data_highlighted.style.apply(lambda x: ['background: yellow' if v else '' for v in min_loss],
                subset=['loss']).apply(lambda x: ['background: yellow' if v else '' for v in min_mae],
                subset=['mean_absolute_error']).apply(lambda x: ['background: yellow' if v else '' for v in min_val_loss],
                subset=['val_loss']).apply(lambda x: ['background: yellow' if v else '' for v in min_val_mae],
                                                    subset=['val_mean_absolute_error'])
    
    return data_highlighted

In [None]:
highlighted_LSTM1_losses = highlight_best(LSTM1_losses)
highlighted_LSTM1_losses

# 7-1-1-2 Evaluation the first LSTM Model on Test Data

In [None]:
def predict_and_inverse_transform(DF, X_test, model, scaler):
    
    test = DF.iloc[-len(X_test):].copy()
    
    predictions = model.predict(X_test)
    inverse_predictions = scaler.inverse_transform(predictions)
    inverse_predictions = pd.DataFrame(inverse_predictions,columns=['Predicted Close', 'Predicted High',
                        'Predicted Low', 'Predicted Open'],index=DF.iloc[-len(X_test):].index)
    
    test_df = pd.concat([test.copy(), inverse_predictions], axis=1)
    test_df[['close', 'high', 'low', 'open']] = scaler.inverse_transform(test_df[['close', 'high', 'low', 'open']])
    
    return test_df


In [None]:
test_df = predict_and_inverse_transform(DF, X_test, LSTM1, scaler)


In [None]:
plt.figure(figsize=(10, 6))
test_df['close'].plot(label='Close Price').autoscale(axis='x',tight=True)
test_df['Predicted Close'].plot(label='Predicted Close Price').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Comparison of Actual and Predicted Close Prices',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()


 

# 7-1-2 Second LSTM Model

In [None]:
LSTM2 = Sequential()

# First LSTM layer
LSTM2.add(LSTM(150, input_shape=(length,X_train.shape[2]), return_sequences=True))
LSTM2.add(Dropout(0.2))

# Second LSTM layer
LSTM2.add(LSTM(100, input_shape=(length,X_train.shape[2]), return_sequences=True))
LSTM2.add(Dropout(0.2))

# Third LSTM layer 
LSTM2.add(LSTM(100, input_shape=(length,X_train.shape[2]), return_sequences=False)) 
LSTM2.add(Dropout(0.2))

# Final Prediction (one neuron per feature)
LSTM2.add(Dense(units=50))
LSTM2.add(Dense(units=5))
LSTM2.add(Dense(X_train.shape[2]))

LSTM2.compile(optimizer='adam', loss='mean_squared_error',metrics=['mean_absolute_error'])


In [None]:
LSTM2.summary()

In [None]:
LSTM2.fit(X_train, y_train,epochs=30,validation_data=(X_test, y_test),batch_size = 32,callbacks=[early_stop],verbose=1)

# 7-1-2-1 Visualizing Model Metrics

In [None]:
title=' Loss and Mean Absolute Error vs. Epochs '
xlabel=' Epochs '
LSTM2_losses = pd.DataFrame(LSTM2.history.history)

ax = LSTM2_losses.plot(figsize=(10,6),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel);



In [None]:
highlighted_LSTM2_losses = highlight_best(LSTM2_losses)
highlighted_LSTM2_losses

# 7-1-2-2 Evaluation the second Model on Test Data

In [None]:
test_df2 = predict_and_inverse_transform(DF, X_test, LSTM2, scaler)

In [None]:
plt.figure(figsize=(10, 6))
test_df2['close'].plot(label='Close Price').autoscale(axis='x',tight=True)
test_df2['Predicted Close'].plot(label='Predicted Close Price').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Comparison of Actual and Predicted Close Prices',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()
plt.show()


# 7-2 GRU Model

In [None]:
GRU_Model = Sequential()

# Add a GRU layer with input_shape
GRU_Model.add(GRU(128, input_shape=(length, X_train.shape[2]), activation='tanh'))

# Add the final Dense layer (one neuron per feature)
GRU_Model.add(Dense(X_train.shape[2]))

# Compile the model
GRU_Model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])


In [None]:
GRU_Model.summary()

In [None]:
GRU_Model.fit(X_train, y_train, epochs=30,validation_data=(X_test, y_test),batch_size = 32,callbacks=[early_stop],verbose=1)

# 7-2-1 Visualizing Model Metrics

In [None]:
title=' Loss and Mean Absolute Error vs. Epochs '
xlabel=' Epochs '
GRU_losses = pd.DataFrame(GRU_Model.history.history)

ax = GRU_losses.plot(figsize=(10,6),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel);



In [None]:
highlighted_GRU_losses = highlight_best(GRU_losses)
highlighted_GRU_losses

# 7-2-2 Evaluation GRU Model on Test Data

In [None]:
def predict_and_inverse_transform2(DF, X_test, model, scaler):
    
    test = DF.iloc[-len(X_test):].copy()
    
    predictions = model.predict(X_test)
    inverse_predictions = scaler.inverse_transform(predictions)
    inverse_predictions = pd.DataFrame(inverse_predictions,columns=['Predicted Close', 'Predicted High',
                        'Predicted Low', 'Predicted Open'],index=DF.iloc[-len(X_test):].index)
    
    test_df = pd.concat([test.copy(), inverse_predictions], axis=1)
    test_df[['close', 'high', 'low', 'open']] = scaler.inverse_transform(test_df[['close', 'high', 'low', 'open']])
    
    
    return test_df


In [None]:
test_df3 = predict_and_inverse_transform2(DF, X_test, GRU_Model, scaler)


In [None]:
plt.figure(figsize=(10, 6))
test_df3['close'].plot(label='Close Price').autoscale(axis='x',tight=True)
test_df3['Predicted Close'].plot(label='Predicted Close Price').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Comparison of Actual and Predicted Close Prices',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()


In [None]:
plt.figure(figsize=(10, 6))
test_df3['high'].plot(label='High Price').autoscale(axis='x',tight=True)
test_df3['Predicted High'].plot(label='Predicted High Price').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Comparison of Actual and Predicted High Prices',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()


In [None]:
plt.figure(figsize=(10, 6))
test_df3['low'].plot(label='Low Price').autoscale(axis='x',tight=True)
test_df3['Predicted Low'].plot(label='Predicted Low Price').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Comparison of Actual and Predicted Low Prices',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()


In [None]:
plt.figure(figsize=(10, 6))
test_df3['open'].plot(label='Open Price').autoscale(axis='x',tight=True)
test_df3['Predicted Open'].plot(label='Predicted Open Price').autoscale(axis='x',tight=True)

plt.legend()
plt.title('Comparison of Actual and Predicted Open Prices',backgroundcolor='grey',color='white',fontdict=font2, fontweight='bold')  
plt.xlabel('Date',fontdict=font3,labelpad=15)
plt.ylabel('Price',fontdict=font3,labelpad=15)
plt.grid(True)
plt.tight_layout()


# 8- Conclusion

In comparing the performance of three different recurrent neural network (RNN) architectures - LSTM1, LSTM2, and GRU - in predicting financial market data, it was observed that all models provided relatively close results. 

However, the GRU model outperformed the LSTM-based models, exhibiting superior predictive accuracy on this dataset. 

# The End.