In [1]:
# ===============================================================
#  ADVANCED TIME SERIES FORECASTING WITH LSTM + TUNING + SHAP
#  FULL PROJECT — READY FOR SUBMISSION
# ===============================================================

!pip install keras-tuner shap prophet

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shap
from prophet import Prophet
import tensorflow as tf
from tensorflow.keras import layers, models
import keras_tuner as kt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


# ===============================================================
# 1. SYNTHETIC MULTIVARIATE TIME-SERIES DATA GENERATION (6000 rows)
# ===============================================================

np.random.seed(42)

N = 6000
time = np.arange(N)

# Base components (non-stationary)
trend = 0.001 * time
seasonal = 0.5 * np.sin(time / 50)
noise = np.random.normal(0, 0.3, N)

# Feature 1: Temperature
temperature = 25 + 5*np.sin(time/200) + noise

# Feature 2: Humidity
humidity = 60 + 10*np.sin(time/150) + 0.2*noise

# Feature 3: Soil Moisture
soil_moisture = 40 + 8*np.sin(time/100) + 0.3*noise

# Feature 4: Rainfall
rainfall = np.abs(3*np.sin(time/80) + np.random.normal(0,1,N))

# Feature 5: Fertilizer Level
fertilizer = 50 + 0.5*np.sin(time/300) + 0.3*noise

# Feature 6: Sunlight Hours
sunlight = 6 + 2*np.sin(time/180) + 0.1*noise

# Target: Crop Yield Index
crop_yield = (
    0.3*temperature
    - 0.2*humidity
    + 0.5*soil_moisture
    + 0.8*rainfall
    + trend
    + seasonal
    + noise
)

df = pd.DataFrame({
    "temperature": temperature,
    "humidity": humidity,
    "soil_moisture": soil_moisture,
    "rainfall": rainfall,
    "fertilizer": fertilizer,
    "sunlight": sunlight,
    "crop_yield": crop_yield
})

print(df.head())


# ===============================================================
# 2. PREPROCESSING
# ===============================================================

scaler = MinMaxScaler()
scaled = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled, columns=df.columns)

SEQ_LEN = 24

def create_sequences(data, seq_len=24):
    X, y = [], []
    for i in range(len(data)-seq_len):
        X.append(data.iloc[i:i+seq_len].values)
        y.append(data.iloc[i+seq_len]["crop_yield"])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_df)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

print("X shape:", X_train.shape)


# ===============================================================
# 3. LSTM MODEL + HYPERPARAMETER TUNING
# ===============================================================

def build_model(hp):
    model = models.Sequential()

    hp_units = hp.Choice("units", [32, 64, 96, 128])
    hp_layers = hp.Int("num_layers", 1, 3)

    model.add(layers.LSTM(hp_units, return_sequences=True, input_shape=X_train.shape[1:]))

    for _ in range(hp_layers):
        model.add(layers.LSTM(hp_units, return_sequences=True))

    model.add(layers.LSTM(hp_units))
    model.add(layers.Dense(32, activation="relu"))
    model.add(layers.Dense(1))

    hp_lr = hp.Choice("learning_rate", [1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp_lr),
        loss="mse"
    )
    return model

tuner = kt.BayesianOptimization(
    build_model,
    objective="val_loss",
    max_trials=5,
    directory="tuning_logs",
    project_name="lstm_tuning"
)

tuner.search(X_train, y_train, epochs=5, validation_split=0.1)

best_hp = tuner.get_best_hyperparameters(1)[0]
model = tuner.hypermodel.build(best_hp)

history = model.fit(
    X_train, y_train,
    epochs=10,
    validation_split=0.1
)

pred_lstm = model.predict(X_test)


# ===============================================================
# 4. EVALUATION METRICS
# ===============================================================

mae_lstm = mean_absolute_error(y_test, pred_lstm)
rmse_lstm = np.sqrt(mean_squared_error(y_test, pred_lstm))
mape_lstm = np.mean(np.abs((y_test - pred_lstm.flatten())/y_test))*100

print("LSTM MAE:", mae_lstm)
print("LSTM RMSE:", rmse_lstm)
print("LSTM MAPE:", mape_lstm)


# ===============================================================
# 5. PROPHET BASELINE MODEL
# ===============================================================

prophet_df = pd.DataFrame({
    "ds": pd.date_range(start="2020-01-01", periods=len(df)),
    "y": df["crop_yield"]
})

train_p = prophet_df.iloc[:-200]
test_p = prophet_df.iloc[-200:]

m = Prophet()
m.fit(train_p)

forecast = m.predict(test_p)

pred_prophet = forecast["yhat"].values
true_prophet = test_p["y"].values

mae_p = mean_absolute_error(true_prophet, pred_prophet)
rmse_p = np.sqrt(mean_squared_error(true_prophet, pred_prophet))
mape_p = np.mean(np.abs((true_prophet - pred_prophet)/true_prophet))*100

print("\nProphet MAE:", mae_p)
print("Prophet RMSE:", rmse_p)
print("Prophet MAPE:", mape_p)


# ===============================================================
# 6. SHAP EXPLAINABILITY FOR LSTM MODEL
# ===============================================================

explainer = shap.DeepExplainer(model, X_train[:200])
shap_values = explainer.shap_values(X_test[:50])

shap.summary_plot(shap_values[0], X_test[:50], feature_names=df.columns[:-1])


# ===============================================================
# 7. PERFORMANCE COMPARISON TABLE
# ===============================================================

comparison = pd.DataFrame({
    "Model": ["LSTM (Tuned)", "Prophet"],
    "MAE": [mae_lstm, mae_p],
    "RMSE": [rmse_lstm, rmse_p],
    "MAPE": [mape_lstm, mape_p]
})

print("\n=== PERFORMANCE COMPARISON ===")
print(comparison)


# ===============================================================
# 8. FORECAST PLOT
# ===============================================================

plt.figure(figsize=(14,5))
plt.plot(y_test[:300], label="Actual")
plt.plot(pred_lstm[:300], label="LSTM Prediction")
plt.title("Time Series Forecasting — LSTM")
plt.legend()
plt.show()


Trial 5 Complete [00h 00m 36s]
val_loss: 0.006153110880404711

Best val_loss So Far: 0.0027008154429495335
Total elapsed time: 00h 05m 44s
Epoch 1/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 156ms/step - loss: 0.0362 - val_loss: 0.0043
Epoch 2/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 164ms/step - loss: 0.0025 - val_loss: 0.0032
Epoch 3/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 172ms/step - loss: 0.0027 - val_loss: 0.0034
Epoch 4/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 152ms/step - loss: 0.0024 - val_loss: 0.0030
Epoch 5/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 164ms/step - loss: 0.0025 - val_loss: 0.0025
Epoch 6/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 159ms/step - loss: 0.0023 - val_loss: 0.0026
Epoch 7/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 155ms/step - loss: 0.0022 - val_loss: 0.0031


AttributeError: 'Prophet' object has no attribute 'stan_backend'