<a href="https://colab.research.google.com/github/BassamTar99/StockPrediction/blob/LSTM_Model/Automated_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Cell 1: Install Dependencies
!pip install yfinance matplotlib tensorflow keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [9]:
#@title Cell 2: Imports & Reproducibility
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from kerastuner.tuners import RandomSearch

# Fix seeds
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)


In [12]:
#@title Cell 3: Prompt for Ticker & Download Data
ticker = input("Enter the stock ticker (e.g. AAPL, TSLA): ").upper().strip()
start  = "2018-01-01"
end    = "2025-04-23"

df = yf.download(ticker, start=start, end=end)
if df.empty:
    raise ValueError(f"No data found for ticker '{ticker}'. Please rerun and try another.")

print(f"Downloaded {len(df)} rows for {ticker}")
df.tail()


Enter the stock ticker (e.g. AAPL, TSLA): tsla


[*********************100%***********************]  1 of 1 completed

Downloaded 1836 rows for TSLA





Price,Close,High,Low,Open,Volume
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-04-15,254.110001,258.75,247.539993,249.910004,79594300
2025-04-16,241.550003,251.970001,233.889999,247.610001,112378700
2025-04-17,241.369995,244.339996,237.679993,243.470001,83404800
2025-04-21,227.5,232.210007,222.789993,230.259995,97768000
2025-04-22,237.970001,242.789993,229.850006,230.960007,120858500


In [21]:
#@title Cell 4: Feature Engineering (Rich Inputs)
# Keep OHLCV + SMAs + RSI + MACD
df_feat = df[['Open','High','Low','Close','Volume']].copy()

# Simple Moving Averages
for w in (10,20,50):
    df_feat[f"SMA_{w}"] = df_feat['Close'].rolling(w).mean()

# RSI
rsi_w = 14
delta = df_feat['Close'].diff()
gain  = delta.clip(lower=0)
loss  = -delta.clip(upper=0)
avg_gain = gain.rolling(rsi_w).mean()
avg_loss = loss.rolling(rsi_w).mean()
rs = avg_gain / avg_loss
df_feat['RSI'] = 100 - (100 / (1 + rs))

# MACD
fast, slow, sig = 12, 26, 9
ema_fast = df_feat['Close'].ewm(span=fast, adjust=False).mean()
ema_slow = df_feat['Close'].ewm(span=slow, adjust=False).mean()
df_feat['MACD']        = ema_fast - ema_slow
df_feat['MACD_SIGNAL'] = df_feat['MACD'].ewm(span=sig, adjust=False).mean()

df_feat.dropna(inplace=True)
print("Features shape:", df_feat.shape)
df_feat.tail()


Features shape: (1787, 11)


Price,Open,High,Low,Close,Volume,SMA_10,SMA_20,SMA_50,RSI,MACD,MACD_SIGNAL
Ticker,TSLA,TSLA,TSLA,TSLA,TSLA,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
2025-04-15,249.910004,258.75,247.539993,254.110001,79594300,252.799001,257.585501,286.1638,44.768902,-8.614868,-10.864511
2025-04-16,247.610001,251.970001,233.889999,241.550003,112378700,248.678,257.870001,283.150601,41.374414,-8.982384,-10.488086
2025-04-17,243.470001,244.339996,237.679993,241.369995,83404800,246.087,258.125501,280.4146,43.613962,-9.18232,-10.226933
2025-04-21,230.259995,232.210007,222.789993,227.5,97768000,244.894,257.065001,277.4782,41.356341,-10.340762,-10.249699
2025-04-22,230.960007,242.789993,229.850006,237.970001,120858500,245.362001,255.044,275.0052,41.728615,-10.295316,-10.258822


In [26]:
#@title Cell 5: Scale & Sequence Creation
data = df_feat.values
scaler = MinMaxScaler((0,1))
scaled = scaler.fit_transform(data)
close_min = scaler.data_min_[target_idx]
close_max = scaler.data_max_[target_idx]


SEQ_LEN      = 30
num_features = scaled.shape[1]
target_idx   = df_feat.columns.get_loc('Close')

X, y = [], []
for i in range(SEQ_LEN, len(scaled)):
    X.append(scaled[i-SEQ_LEN:i, :])
    y.append(scaled[i, target_idx])

X = np.array(X)  # (samples, 30, num_features)
y = np.array(y)  # (samples,)
print("X shape:", X.shape, "y shape:", y.shape)


X shape: (1757, 30, 11) y shape: (1757, 1)


In [27]:
#@title Cell 6: 80/10/10 Train/Val/Test Split
n       = len(X)
n_train = int(0.8 * n)
n_val   = int(0.1 * n)

X_train, y_train = X[:n_train], y[:n_train]
X_val,   y_val   = X[n_train:n_train+n_val], y[n_train:n_train+n_val]
X_test,  y_test  = X[n_train+n_val:], y[n_train+n_val:]

print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)


Train: (1405, 30, 11) Val: (175, 30, 11) Test: (177, 30, 11)


In [28]:
#@title Cell 7: Hyperparameter Tuning with Keras-Tuner
def build_model(hp):
    model = Sequential()
    # Tune LSTM units
    units = hp.Int('units', 32, 128, step=16)
    model.add(
        LSTM(units,
             return_sequences=False,
             input_shape=(SEQ_LEN, num_features),
             dropout=hp.Float('dropout', .1, .5, step=.1),
             recurrent_dropout=hp.Float('rec_dropout', .1, .5, step=.1))
    )
    model.add(Dropout(hp.Float('post_dropout', .1, .5, step=.1)))
    # Choose optimizer & learning rate
    opt_name = hp.Choice('optimizer', ['adam','rmsprop','sgd'])
    lr       = hp.Float('lr', 1e-4, 1e-2, sampling='log')
    if opt_name == 'adam':
        opt = Adam(learning_rate=lr)
    elif opt_name == 'rmsprop':
        opt = RMSprop(learning_rate=lr)
    else:
        opt = SGD(learning_rate=lr)
    model.compile(optimizer=opt, loss='mse')
    return model

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    directory='tuner_dir',
    project_name=f'{ticker}_lstm_tuning'
)

tuner.search(
    X_train, y_train,
    epochs=10,
    validation_data=(X_val, y_val),
    callbacks=[EarlyStopping(monitor='val_loss', patience=3)]
)

best_hps = tuner.get_best_hyperparameters(1)[0]
print("Best Hyperparameters:")
print(" • units:",    best_hps.get('units'))
print(" • dropout:",  best_hps.get('dropout'))
print(" • rec_dp:",   best_hps.get('rec_dropout'))
print(" • post_dp:",  best_hps.get('post_dropout'))
print(" • optimizer:",best_hps.get('optimizer'))
print(" • lr:",       best_hps.get('lr'))


Reloading Tuner from tuner_dir/TSLA_lstm_tuning/tuner0.json
Best Hyperparameters:
 • units: 128
 • dropout: 0.1
 • rec_dp: 0.4
 • post_dp: 0.2
 • optimizer: adam
 • lr: 0.00016379559434627597


In [29]:
#@title Cell 8: Train Final Model
model = tuner.hypermodel.build(best_hps)
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
]
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/30


  super().__init__(**kwargs)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 77ms/step - loss: 0.1408 - val_loss: 0.0797 - learning_rate: 1.6380e-04
Epoch 2/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 63ms/step - loss: 0.0795 - val_loss: 0.0148 - learning_rate: 1.6380e-04
Epoch 3/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 89ms/step - loss: 0.0510 - val_loss: 0.0050 - learning_rate: 1.6380e-04
Epoch 4/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 63ms/step - loss: 0.0436 - val_loss: 0.0042 - learning_rate: 1.6380e-04
Epoch 5/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 61ms/step - loss: 0.0414 - val_loss: 0.0035 - learning_rate: 1.6380e-04
Epoch 6/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 69ms/step - loss: 0.0407 - val_loss: 0.0035 - learning_rate: 1.6380e-04
Epoch 7/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 61ms/step - loss: 0.0399 - val_loss: 0.0035 - learning

In [30]:
#@title Cell 9: Evaluate & Numeric RMSE (Fixed)
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import numpy as np

# 1) Predict (this stays the same)
y_pred = model.predict(X_test).flatten()    # shape: (n_test,)

# 2) Invert scaling *just* using the min/max of the Close column
#    original_scaled = (value - close_min) / (close_max - close_min)
#    so inverse is:
y_true_uv = y_test * (close_max - close_min) + close_min
y_pred_uv = y_pred * (close_max - close_min) + close_min

# 3) Compute RMSE numerically
rmse = np.sqrt(mean_squared_error(y_true_uv, y_pred_uv))
print(f"🔍 Test RMSE for {ticker}: {rmse:.4f}")

# 4) Plot actual vs. predicted
plt.figure(figsize=(10,4))
plt.plot(y_true_uv, label='Actual')
plt.plot(y_pred_uv, label='Predicted')
plt.title("Test Set: Actual vs. Predicted")
plt.xlabel("Sample")
plt.ylabel("Price (USD)")
plt.legend()
plt.grid(True)
plt.show()


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step


ValueError: Found input variables with inconsistent numbers of samples: [177, 22656]

In [None]:
#@title Cell 10: One-Day-Ahead Forecast & Actual Check
last_seq     = scaled[-SEQ_LEN:, :].reshape(1, SEQ_LEN, num_features)
scaled_next  = model.predict(last_seq).flatten()
next_price   = inv_scale(scaled_next)[0]
print(f"🔮 Predicted next close: ${next_price:.2f}")

# Fetch a few days of actuals
next_start = pd.to_datetime(end) + pd.Timedelta(days=1)
check = yf.download(ticker, start=next_start, end=next_start + pd.Timedelta(days=5))
print("\nActual closing prices after prediction day:")
print(check['Close'])
