In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error

In [2]:
# Read data
data = pd.read_csv('GOOGL.csv')
data_copy = data.copy()

In [3]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,50.050049,52.082081,48.028027,50.220219,50.220219,44659096
1,2004-08-20,50.555557,54.594597,50.300301,54.209209,54.209209,22834343
2,2004-08-23,55.430431,56.796799,54.579578,54.754753,54.754753,18256126
3,2004-08-24,55.675674,55.855858,51.836838,52.487488,52.487488,15247337
4,2004-08-25,52.532532,54.054054,51.991993,53.053055,53.053055,9188602


In [4]:
# Drop missing values
data_copy.dropna(inplace=True)

In [5]:
# Select features
selected_features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
data_copy = data_copy[selected_features]

In [6]:
data_copy.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,50.050049,52.082081,48.028027,50.220219,50.220219,44659096
1,50.555557,54.594597,50.300301,54.209209,54.209209,22834343
2,55.430431,56.796799,54.579578,54.754753,54.754753,18256126
3,55.675674,55.855858,51.836838,52.487488,52.487488,15247337
4,52.532532,54.054054,51.991993,53.053055,53.053055,9188602


In [7]:
# Scale data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_copy)

In [8]:
# Prepare DataFrame
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'], utc=True)

In [9]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19 00:00:00+00:00,50.050049,52.082081,48.028027,50.220219,50.220219,44659096
1,2004-08-20 00:00:00+00:00,50.555557,54.594597,50.300301,54.209209,54.209209,22834343
2,2004-08-23 00:00:00+00:00,55.430431,56.796799,54.579578,54.754753,54.754753,18256126
3,2004-08-24 00:00:00+00:00,55.675674,55.855858,51.836838,52.487488,52.487488,15247337
4,2004-08-25 00:00:00+00:00,52.532532,54.054054,51.991993,53.053055,53.053055,9188602


In [10]:
# Function to prepare data
def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

In [11]:
# Define time steps
time_steps = 60  
X, y = prepare_data(scaled_data, time_steps)

In [17]:
# Train-test split
split_ratio = 0.8
split_index = int(split_ratio * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [18]:
# Build LSTM model
model = Sequential([
    LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(units=100, return_sequences=True),
    Dropout(0.2),
    LSTM(units=100),
    Dropout(0.2),
    Dense(units=len(selected_features))
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

  super().__init__(**kwargs)


In [None]:
# Train model
epochs = 15
history = model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=1)
loss = history.history['loss']

Epoch 1/15
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 69ms/step - accuracy: 0.3829 - loss: 0.0040
Epoch 2/15
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 73ms/step - accuracy: 0.4378 - loss: 0.0011
Epoch 3/15
[1m 39/110[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m5s[0m 76ms/step - accuracy: 0.4710 - loss: 0.0011

In [None]:
# Plot training loss
plt.plot(range(len(loss)), loss, 'r', label='Training loss')
plt.title('Training loss', size=15, weight='bold')
plt.legend(loc='best')
plt.show()

In [None]:
# Evaluate model
train_loss = model.evaluate(X_train, y_train, verbose=0)
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f"Train Loss: {train_loss}, Test Loss: {test_loss}")

In [None]:
# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
y_test_inverse = scaler.inverse_transform(y_test)

In [None]:
# Plot predicted vs actual close prices
plt.figure(figsize=(10, 6))
plt.plot(predictions[:, 3], label='Predicted Close Price', color='green')
plt.plot(y_test_inverse[:, 3], label='Actual Close Price', color='b')
plt.title('Google Stock Price Prediction 2018 to 2022')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Calculate MAE and RMSE
mae = mean_absolute_error(y_test_inverse[:, 3], predictions[:, 3])
rmse = mean_squared_error(y_test_inverse[:, 3], predictions[:, 3], squared=False)
print(f'Mean Absolute Error (MAE): {mae}, Root Mean Squared Error (RMSE): {rmse}')

In [None]:
# Scale the latest data
scaled_latest_data = scaler.transform(latest_data)

# Prepare the data for prediction
latest_X, latest_y = prepare_data(scaled_latest_data, time_steps)

# Make predictions
latest_predictions = model.predict(latest_X)

# Inverse scale the predictions
latest_predictions = scaler.inverse_transform(latest_predictions)

# Get the most recent predicted close price
most_recent_predicted_close_price = latest_predictions[-1][3]  # Assuming index 3 corresponds to the 'Close' price

print("Most recent predicted close price:", most_recent_predicted_close_price)
