# LSTM Price Prediction - Improved Version

**Changes from original:**
- Smaller model architecture to prevent overfitting
- Added L2 regularization
- Learning rate scheduler
- Better early stopping with more patience
- Data validation and sanity checks
- Fixed evaluation metrics

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

## Configuration
**Change COIN_NAME and HORIZON to train different models**

In [None]:
# ======= CONFIGURATION =======
COIN_NAME = "bitcoin"  # Options: bitcoin, ethereum, solana, cardano, binancecoin
HORIZON = "1h"         # Options: 1h, 24h

BASE_DIR = "/Users/ayushgupta/Desktop/ML-Driven-Web-Platform-for-Cryptocurrency-Price-Forecasting_November_Batch-5_2025"
SEQ_PATH = f"{BASE_DIR}/Milestone_1/data/sequences/{COIN_NAME}/{HORIZON}/"
MODEL_SAVE_PATH = f"{BASE_DIR}/Milestone_2/models/{COIN_NAME}/{HORIZON}/"

os.makedirs(MODEL_SAVE_PATH, exist_ok=True)
print(f"Training: {COIN_NAME} - {HORIZON}")
print(f"Model will be saved to: {MODEL_SAVE_PATH}")

## Load Data

In [None]:
# Load sequences
X_train = np.load(SEQ_PATH + "X_train.npy")
y_train = np.load(SEQ_PATH + "y_train.npy")
X_val = np.load(SEQ_PATH + "X_val.npy")
y_val = np.load(SEQ_PATH + "y_val.npy")
X_test = np.load(SEQ_PATH + "X_test.npy")
y_test = np.load(SEQ_PATH + "y_test.npy")

print("=" * 50)
print("DATA SHAPES")
print("=" * 50)
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_val:   {X_val.shape}")
print(f"y_val:   {y_val.shape}")
print(f"X_test:  {X_test.shape}")
print(f"y_test:  {y_test.shape}")

## Data Validation - Critical Sanity Checks

In [None]:
print("=" * 50)
print("DATA VALIDATION")
print("=" * 50)

# Check for NaN/Inf
print(f"\nNaN in X_train: {np.isnan(X_train).sum()}")
print(f"NaN in y_train: {np.isnan(y_train).sum()}")
print(f"Inf in X_train: {np.isinf(X_train).sum()}")
print(f"Inf in y_train: {np.isinf(y_train).sum()}")

# Check target distribution
print(f"\ny_train stats:")
print(f"  Min:    {y_train.min():.4f}")
print(f"  Max:    {y_train.max():.4f}")
print(f"  Mean:   {y_train.mean():.4f}")
print(f"  Std:    {y_train.std():.4f}")

# Check feature distributions (last timestep)
print(f"\nX_train feature stats (last timestep):")
feature_names = ["open", "high", "low", "volume", "return_1h", "volatility_24h", 
                 "ma_24", "ma_168", "ma_ratio", "vol_change", "missing_flag"]
for i, name in enumerate(feature_names):
    vals = X_train[:, -1, i]
    print(f"  {name:15s}: min={vals.min():.4f}, max={vals.max():.4f}, mean={vals.mean():.4f}")

# Check if data is properly scaled [0, 1]
if X_train.min() < -0.1 or X_train.max() > 1.1:
    print("\n⚠️ WARNING: Features may not be properly scaled to [0,1]!")
if y_train.min() < -0.1 or y_train.max() > 1.1:
    print("⚠️ WARNING: Target may not be properly scaled to [0,1]!")

In [None]:
# Visualize target distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

axes[0].hist(y_train, bins=50, alpha=0.7, label='Train', color='blue')
axes[0].hist(y_val, bins=50, alpha=0.7, label='Val', color='orange')
axes[0].set_title('Target Distribution')
axes[0].legend()
axes[0].set_xlabel('Scaled Price')

axes[1].plot(y_train[:1000], linewidth=0.5)
axes[1].set_title('y_train (first 1000 samples)')
axes[1].set_xlabel('Sample')
axes[1].set_ylabel('Scaled Price')

axes[2].plot(y_test[:500], linewidth=0.5)
axes[2].set_title('y_test (first 500 samples)')
axes[2].set_xlabel('Sample')
axes[2].set_ylabel('Scaled Price')

plt.tight_layout()
plt.show()

## Build Improved Model

**Key changes:**
- Smaller architecture (32 → 16 LSTM units instead of 128 → 64)
- L2 regularization to prevent overfitting
- Batch normalization for stability
- Sigmoid output since target is [0, 1]

In [None]:
def build_model(input_shape, l2_reg=0.001):
    """Build improved LSTM model with regularization."""
    model = Sequential([
        # First LSTM layer - smaller than before
        LSTM(32, 
             return_sequences=True, 
             input_shape=input_shape,
             kernel_regularizer=l2(l2_reg),
             recurrent_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Dropout(0.2),
        
        # Second LSTM layer
        LSTM(16, 
             kernel_regularizer=l2(l2_reg),
             recurrent_regularizer=l2(l2_reg)),
        BatchNormalization(),
        Dropout(0.2),
        
        # Dense layers
        Dense(8, activation="relu", kernel_regularizer=l2(l2_reg)),
        
        # Output layer - sigmoid since target is [0, 1]
        Dense(1, activation="sigmoid")
    ])
    
    return model

# Build model
input_shape = (X_train.shape[1], X_train.shape[2])  # (48, 11)
model = build_model(input_shape)

# Compile with a lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",
    metrics=["mae"]
)

model.summary()

## Setup Callbacks

In [None]:
# Callbacks
checkpoint = ModelCheckpoint(
    f"{MODEL_SAVE_PATH}/best_lstm_{COIN_NAME}_{HORIZON}.keras",
    monitor="val_loss",
    save_best_only=True,
    mode="min",
    verbose=1
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=20,  # Increased patience
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

callbacks = [checkpoint, early_stop, reduce_lr]
print("Callbacks configured:")
print("  - ModelCheckpoint (save best)")
print("  - EarlyStopping (patience=20)")
print("  - ReduceLROnPlateau (factor=0.5, patience=5)")

## Train Model

In [None]:
print("=" * 50)
print(f"TRAINING: {COIN_NAME} - {HORIZON}")
print("=" * 50)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,  # More epochs, but early stopping will handle it
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

## Training Curves

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(history.history['loss'], label='Train Loss', linewidth=2)
axes[0].plot(history.history['val_loss'], label='Val Loss', linewidth=2)
axes[0].set_title(f'{COIN_NAME} {HORIZON} - Loss Curve')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# MAE
axes[1].plot(history.history['mae'], label='Train MAE', linewidth=2)
axes[1].plot(history.history['val_mae'], label='Val MAE', linewidth=2)
axes[1].set_title(f'{COIN_NAME} {HORIZON} - MAE Curve')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f"{MODEL_SAVE_PATH}/training_curves.png", dpi=150)
plt.show()

# Check for overfitting
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
overfit_ratio = final_val_loss / final_train_loss

print(f"\nFinal Train Loss: {final_train_loss:.6f}")
print(f"Final Val Loss:   {final_val_loss:.6f}")
print(f"Overfit Ratio:    {overfit_ratio:.2f}x")

if overfit_ratio > 5:
    print("⚠️ WARNING: Model may be overfitting (val_loss >> train_loss)")
elif overfit_ratio < 2:
    print("✅ Good: Model seems well-regularized")

## Evaluation

In [None]:
# Evaluate on test set
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print("=" * 50)
print("TEST SET EVALUATION")
print("=" * 50)
print(f"Test MSE:  {test_loss:.6f}")
print(f"Test MAE:  {test_mae:.6f}")
print(f"Test RMSE: {np.sqrt(test_loss):.6f}")

In [None]:
# Make predictions
preds = model.predict(X_test, verbose=0).flatten()

# Calculate metrics
mae = mean_absolute_error(y_test, preds)
mse = mean_squared_error(y_test, preds)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, preds)

# MAPE (handle division by zero)
mask = y_test > 0.01
mape = np.mean(np.abs((y_test[mask] - preds[mask]) / y_test[mask])) * 100

print("\n" + "=" * 50)
print("DETAILED METRICS (on scaled data)")
print("=" * 50)
print(f"MAE:   {mae:.4f}")
print(f"RMSE:  {rmse:.4f}")
print(f"MAPE:  {mape:.2f}%")
print(f"R²:    {r2:.4f}")

if r2 < 0:
    print("\n⚠️ NEGATIVE R² - Model is worse than predicting the mean!")
elif r2 > 0.5:
    print("\n✅ Good R² score!")
else:
    print("\n⚠️ Low R² - Model has room for improvement")

In [None]:
# Baseline comparison: Persistence model (predict last known value)
# The last value in the sequence (X[:, -1, :]) contains scaled features
# We compare against just predicting y stays the same

# Shift y_test by 1 to get "previous" value as baseline
baseline_preds = np.roll(y_test, 1)
baseline_preds[0] = y_test[0]  # Handle first element

baseline_mae = mean_absolute_error(y_test, baseline_preds)
baseline_r2 = r2_score(y_test, baseline_preds)

print("\n" + "=" * 50)
print("BASELINE COMPARISON")
print("=" * 50)
print(f"Persistence Baseline MAE: {baseline_mae:.4f}")
print(f"Persistence Baseline R²:  {baseline_r2:.4f}")
print(f"\nLSTM Model MAE:          {mae:.4f}")
print(f"LSTM Model R²:           {r2:.4f}")

if mae < baseline_mae:
    improvement = (baseline_mae - mae) / baseline_mae * 100
    print(f"\n✅ Model beats baseline by {improvement:.1f}%!")
else:
    print("\n⚠️ Model performs worse than simple persistence baseline")

In [None]:
# Plot predictions vs actual
n_samples = 500

fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Time series plot
axes[0].plot(y_test[:n_samples], label='Actual', linewidth=1.5, alpha=0.8)
axes[0].plot(preds[:n_samples], label='Predicted', linewidth=1.5, alpha=0.8)
axes[0].set_title(f'{COIN_NAME} {HORIZON} - Predictions vs Actual')
axes[0].set_xlabel('Sample')
axes[0].set_ylabel('Scaled Price')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Scatter plot
axes[1].scatter(y_test, preds, alpha=0.3, s=10)
axes[1].plot([0, 1], [0, 1], 'r--', linewidth=2, label='Perfect Prediction')
axes[1].set_title('Predicted vs Actual (Scatter)')
axes[1].set_xlabel('Actual')
axes[1].set_ylabel('Predicted')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_xlim(0, 1)
axes[1].set_ylim(0, 1)

plt.tight_layout()
plt.savefig(f"{MODEL_SAVE_PATH}/predictions.png", dpi=150)
plt.show()

## Save Final Model

In [None]:
# Save the final model
final_path = f"{MODEL_SAVE_PATH}/final_lstm_{COIN_NAME}_{HORIZON}.keras"
model.save(final_path)

print("=" * 50)
print("MODEL SAVED")
print("=" * 50)
print(f"Path: {final_path}")
print(f"\nFinal Metrics:")
print(f"  MAE:  {mae:.4f}")
print(f"  RMSE: {rmse:.4f}")
print(f"  R²:   {r2:.4f}")

## Quick Test: Verify Model Output Range

In [None]:
# Verify model outputs are in valid range
print("=" * 50)
print("MODEL OUTPUT VALIDATION")
print("=" * 50)

print(f"Prediction range: [{preds.min():.4f}, {preds.max():.4f}]")
print(f"Expected range:   [0.0000, 1.0000]")

if preds.min() >= 0 and preds.max() <= 1:
    print("✅ Model outputs are in valid range")
else:
    print("⚠️ Model outputs are outside [0, 1] range!")

# Check for constant predictions
pred_std = preds.std()
print(f"\nPrediction std: {pred_std:.4f}")
if pred_std < 0.01:
    print("⚠️ WARNING: Predictions are nearly constant! Model may not have learned.")
else:
    print("✅ Predictions have good variance")

# Check extreme saturation
near_zero = (preds < 0.05).sum() / len(preds) * 100
near_one = (preds > 0.95).sum() / len(preds) * 100
print(f"\nPredictions near 0 (<0.05): {near_zero:.1f}%")
print(f"Predictions near 1 (>0.95): {near_one:.1f}%")

if near_zero > 50 or near_one > 50:
    print("⚠️ WARNING: Model is saturating to extreme values!")

---
## Next Steps

After training all coins, run these cells to train the next coin:

1. Change `COIN_NAME` in the Configuration cell
2. Restart kernel and run all cells
3. Repeat for: `bitcoin`, `ethereum`, `solana`, `cardano`, `binancecoin`