<a href="https://colab.research.google.com/github/Bhuvaneswarij/gitingest/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import sys
import math
import joblib
import warnings
from pathlib import Path
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [2]:
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [4]:
# Try importing required libraries; install only if missing
try:
    import tensorflow as tf
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import yfinance as yf
    import shap
    import optuna
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    from tensorflow.keras import layers, models, regularizers
    import statsmodels.api as sm
    from prophet import Prophet
    import warnings
    warnings.filterwarnings("ignore")

except Exception as e:
    print("Some libraries missing. Installing now...")
    !pip install tensorflow yfinance shap optuna pmdarima prophet statsmodels --quiet
    import tensorflow as tf
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import yfinance as yf
    import shap
    import optuna
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    from tensorflow.keras import layers, models, regularizers
    import statsmodels.api as sm
    from prophet import Prophet
    import warnings
    warnings.filterwarnings("ignore")


Some libraries missing. Installing now...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m689.1/689.1 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:


TICKER = "AAPL"          # change to any symbol: TSLA, INFY.NS, TCS.NS, etc.
START_DATE = "2010-01-01"
END_DATE = "2024-12-01"

print(f"Downloading data for {TICKER}...")

df = yf.download(TICKER, start=START_DATE, end=END_DATE)

# Check data
print("Shape:", df.shape)
df.head()


Downloading data for AAPL...


[*********************100%***********************]  1 of 1 completed

Shape: (3753, 5)





Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2010-01-04,6.418383,6.433078,6.369497,6.400988,493729600
2010-01-05,6.42948,6.465769,6.39559,6.436078,601904800
2010-01-06,6.327211,6.454973,6.320613,6.42948,552160000
2010-01-07,6.315514,6.358101,6.269627,6.350604,477131200
2010-01-08,6.357499,6.358099,6.269926,6.307114,447610800


In [6]:


df["return"] = df["Close"].pct_change()
df["log_return"] = np.log(df["Close"] / df["Close"].shift(1))

# Moving averages
df["MA7"] = df["Close"].rolling(window=7).mean()
df["MA21"] = df["Close"].rolling(window=21).mean()

# Volatility
df["Volatility"] = df["Close"].rolling(window=21).std()

# RSI (Relative Strength Index)
delta = df["Close"].diff()
gain = delta.clip(lower=0)
loss = -1 * delta.clip(upper=0)
avg_gain = gain.rolling(14).mean()
avg_loss = loss.rolling(14).mean()
rs = avg_gain / avg_loss
df["RSI"] = 100 - (100 / (1 + rs))

# Drop NaNs
df = df.dropna()
df.head()


Price,Close,High,Low,Open,Volume,return,log_return,MA7,MA21,Volatility,RSI
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
2010-02-02,5.874046,5.887842,5.799668,5.875546,698342400,0.005803,0.005786,5.993196,6.194122,0.204656,40.705303
2010-02-03,5.975115,6.004206,5.830858,5.853351,615328000,0.017206,0.01706,5.976744,6.173014,0.203224,41.111444
2010-02-04,5.759779,5.949322,5.745383,5.900136,757652000,-0.036039,-0.036704,5.917233,6.141123,0.213264,37.621057
2010-02-05,5.862048,5.878244,5.723789,5.777174,850306800,0.017756,0.0176,5.86402,6.118972,0.217092,42.53313
2010-02-08,5.821862,5.934628,5.818263,5.868948,478270800,-0.006855,-0.006879,5.84187,6.095465,0.22143,33.221004


In [8]:
# ============================
# STEP 4: TRAIN/VAL/TEST SPLIT + SCALING
# ============================

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Use last 20% as test
TEST_RATIO = 0.2
SEQ_LEN = 60   # lookback period for LSTM

# Select features and target
features = ["Open","High","Low","Close","Volume","MA7","MA21","Volatility","RSI","return","log_return"]
target = "Close"
X = df[features].values
y = df[target].values

# -------------------- Train/Test split --------------------
n_test = int(len(X) * TEST_RATIO)
X_trainval, X_test = X[:-n_test], X[-n_test:]
y_trainval, y_test = y[:-n_test], y[-n_test:]

# Further split train into train + val (90/10)
n_val = int(len(X_trainval)*0.1)
X_train, X_val = X_trainval[:-n_val], X_trainval[-n_val:]
y_train, y_val = y_trainval[:-n_val], y_trainval[-n_val:]

# -------------------- Scaling --------------------
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_s = scaler_X.fit_transform(X_train)
X_val_s = scaler_X.transform(X_val)
X_test_s = scaler_X.transform(X_test)

y_train_s = scaler_y.fit_transform(y_train.reshape(-1,1)).flatten()
y_val_s = scaler_y.transform(y_val.reshape(-1,1)).flatten()
y_test_s = scaler_y.transform(y_test.reshape(-1,1)).flatten()

print("Scaled shapes:", X_train_s.shape, X_val_s.shape, X_test_s.shape)





Scaled shapes: (2689, 11) (298, 11) (746, 11)


In [9]:
def create_sequences(X, y, seq_len=SEQ_LEN):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])
        y_seq.append(y[i+seq_len])
    return np.array(X_seq), np.array(y_seq)


In [10]:
X_train_seq, y_train_seq = create_sequences(X_train_s, y_train_s)
X_val_seq, y_val_seq = create_sequences(X_val_s, y_val_s)
X_test_seq, y_test_seq = create_sequences(X_test_s, y_test_s)



In [11]:

print("Sequence shapes:", X_train_seq.shape, y_train_seq.shape)

Sequence shapes: (2629, 60, 11) (2629,)


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam


In [13]:
def build_lstm(seq_len, n_features, units=64, dropout=0.2, l1=0.0, l2=0.0, lr=1e-3):
    from tensorflow.keras.regularizers import l1_l2
    reg = l1_l2(l1=l1, l2=l2) if (l1>0 or l2>0) else None
    model = Sequential()
    model.add(LSTM(units, input_shape=(seq_len, n_features), kernel_regularizer=reg))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse')
    return model

In [14]:
n_features = X_train_seq.shape[2]
model = build_lstm(SEQ_LEN, n_features, units=64, dropout=0.2)

In [15]:
ckpt = ModelCheckpoint("point_lstm.h5", save_best_only=True, monitor='val_loss')
es = EarlyStopping(patience=10, restore_best_weights=True, monitor='val_loss')

In [16]:
history = model.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=50,
    batch_size=64,
    callbacks=[ckpt, es],
    verbose=1
)

Epoch 1/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.5988



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 39ms/step - loss: 0.5836 - val_loss: 3.8565
Epoch 2/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.0418



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0414 - val_loss: 1.5510
Epoch 3/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.0230



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0229 - val_loss: 1.3132
Epoch 4/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.0191



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0191 - val_loss: 1.3096
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 0.0185 - val_loss: 1.3632
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0145



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - loss: 0.0145 - val_loss: 1.2139
Epoch 7/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 0.0138 - val_loss: 1.4040
Epoch 8/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 0.0132



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0132 - val_loss: 1.1306
Epoch 9/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 45ms/step - loss: 0.0127



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - loss: 0.0127 - val_loss: 0.8942
Epoch 10/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 52ms/step - loss: 0.0155



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 58ms/step - loss: 0.0154 - val_loss: 0.8170
Epoch 11/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 0.0119 - val_loss: 0.8766
Epoch 12/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 0.0110 - val_loss: 0.8919
Epoch 13/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - loss: 0.0137 - val_loss: 0.8269
Epoch 14/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.0104



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0104 - val_loss: 0.7286
Epoch 15/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.0123



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - loss: 0.0123 - val_loss: 0.6779
Epoch 16/50
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - loss: 0.0092



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0092 - val_loss: 0.4293
Epoch 17/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0095 - val_loss: 0.8620
Epoch 18/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0112 - val_loss: 0.7801
Epoch 19/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0081 - val_loss: 0.7610
Epoch 20/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 0.0095 - val_loss: 0.9446
Epoch 21/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 44ms/step - loss: 0.0083 - val_loss: 0.7559
Epoch 22/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 0.0082 - val_loss: 0.6820
Epoch 23/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0073 - val_loss: 0.64

In [17]:
model.load_weights("point_lstm.h5")

In [18]:
y_pred_s = model.predict(X_test_seq).flatten()
y_pred = scaler_y.inverse_transform(y_pred_s.reshape(-1,1)).flatten()
y_true = y_test[SEQ_LEN:]

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step


In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
def rmse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred, squared=False)

In [21]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

def rmse(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return np.sqrt(np.mean((y_true - y_pred)**2))


In [22]:
print("Test MAE:", mean_absolute_error(y_true, y_pred))
print("Test RMSE:", rmse(y_true, y_pred))


Test MAE: 37.4136698308561
Test RMSE: 47.06418246401148


In [23]:
# ------------------- MC Dropout model -------------------
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

def build_mc_lstm(seq_len, n_features, units=64, dropout=0.2, lr=1e-3):
    inp = Input(shape=(seq_len, n_features))
    x = LSTM(units)(inp)
    d = Dropout(dropout)
    x = d(x, training=True)  # ensure dropout active at inference
    out = Dense(1)(x)
    model = Model(inp, out)
    model.compile(optimizer=Adam(lr), loss='mse')
    return model

mc_model = build_mc_lstm(SEQ_LEN, n_features, units=64, dropout=0.2)
es = EarlyStopping(patience=10, restore_best_weights=True)
mc_model.fit(X_train_seq, y_train_seq, validation_data=(X_val_seq, y_val_seq), epochs=50, batch_size=64, callbacks=[es], verbose=1)

# ------------------- MC prediction intervals -------------------
N_MONTECARLO = 200
ALPHA = 0.05

preds = []
for _ in range(N_MONTECARLO):
    p = mc_model.predict(X_test_seq, verbose=0).flatten()
    preds.append(p)
preds = np.vstack(preds)
median = np.median(preds, axis=0)
lower = np.percentile(preds, 100*ALPHA/2, axis=0)
upper = np.percentile(preds, 100*(1-ALPHA/2), axis=0)
# inverse scale
median = scaler_y.inverse_transform(median.reshape(-1,1)).flatten()
lower = scaler_y.inverse_transform(lower.reshape(-1,1)).flatten()
upper = scaler_y.inverse_transform(upper.reshape(-1,1)).flatten()

# interval metrics
covered = ((y_true >= lower) & (y_true <= upper)).astype(int)
ICP = covered.mean()
MIW = np.mean(upper - lower)
print("MC Dropout Interval metrics → ICP:", ICP, "MIW:", MIW)


Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 40ms/step - loss: 0.4933 - val_loss: 2.7122
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0292 - val_loss: 1.7418
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0186 - val_loss: 1.3805
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0159 - val_loss: 1.2100
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0144 - val_loss: 1.5169
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0126 - val_loss: 0.9641
Epoch 7/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 0.0121 - val_loss: 1.0206
Epoch 8/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - loss: 0.0123 - val_loss: 0.8459
Epoch 9/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━

In [24]:
def pinball_loss(q):
    def loss(y_true, y_pred):
        e = y_true - y_pred
        return tf.reduce_mean(tf.maximum(q*e, (q-1)*e))
    return loss

qs = [0.1, 0.5, 0.9]
q_preds = {}

for q in qs:
    print(f"Training Quantile LSTM q={q}...")
    model_q = Sequential([
        LSTM(64, input_shape=(SEQ_LEN, n_features)),
        Dropout(0.2),
        Dense(1)
    ])
    model_q.compile(optimizer=Adam(1e-3), loss=pinball_loss(q))
    es = EarlyStopping(patience=8, restore_best_weights=True)
    model_q.fit(X_train_seq, y_train_seq, validation_data=(X_val_seq, y_val_seq), epochs=50, batch_size=64, callbacks=[es], verbose=0)

    pred_s = model_q.predict(X_test_seq).flatten()
    pred = scaler_y.inverse_transform(pred_s.reshape(-1,1)).flatten()
    q_preds[q] = pred

# Quantile interval metrics
q_lower = q_preds[0.1]
q_upper = q_preds[0.9]
covered = ((y_true >= q_lower) & (y_true <= q_upper)).astype(int)
ICP_q = covered.mean()
MIW_q = np.mean(q_upper - q_lower)
print("Quantile LSTM metrics → ICP:", ICP_q, "MIW:", MIW_q)


Training Quantile LSTM q=0.1...
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step
Training Quantile LSTM q=0.5...
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Training Quantile LSTM q=0.9...
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Quantile LSTM metrics → ICP: 0.25542716045185254 MIW: 27.998468


In [25]:
# SARIMA
try:
    import pmdarima as pmd
    sarima_model = pmd.auto_arima(y_train, seasonal=False, stepwise=True, suppress_warnings=True)
    fc, conf_int = sarima_model.predict(n_periods=len(y_test), return_conf_int=True)
    sarima_lower = conf_int[:,0]
    sarima_upper = conf_int[:,1]
    covered = ((y_true >= sarima_lower) & (y_true <= sarima_upper)).astype(int)
    ICP_s = covered.mean()
    MIW_s = np.mean(sarima_upper - sarima_lower)
    print("SARIMA → ICP:", ICP_s, "MIW:", MIW_s)
except Exception as e:
    print("SARIMA failed:", e)

# Prophet
try:
    from prophet import Prophet
    pf_df = df[['date','Close']].rename(columns={'date':'ds','Close':'y'})
    m = Prophet(daily_seasonality=False, yearly_seasonality=True)
    m.fit(pf_df[:-len(y_test)])
    future = m.make_future_dataframe(periods=len(y_test))
    fcst = m.predict(future)
    pf_mean = fcst['yhat'].values[-len(y_test):]
    pf_lower = fcst['yhat_lower'].values[-len(y_test):]
    pf_upper = fcst['yhat_upper'].values[-len(y_test):]
    covered = ((y_true >= pf_lower) & (y_true <= pf_upper)).astype(int)
    ICP_p = covered.mean()
    MIW_p = np.mean(pf_upper - pf_lower)
    print("Prophet → ICP:", ICP_p, "MIW:", MIW_p)
except Exception as e:
    print("Prophet failed:", e)


SARIMA → ICP: 0.942249822180883 MIW: 4287.471345514768
Prophet failed: "['date'] not in index"


In [26]:
# SARIMA
try:
    import pmdarima as pmd
    sarima_model = pmd.auto_arima(y_train, seasonal=False, stepwise=True, suppress_warnings=True)
    fc, conf_int = sarima_model.predict(n_periods=len(y_test), return_conf_int=True)
    sarima_lower = conf_int[:,0]
    sarima_upper = conf_int[:,1]
    covered = ((y_true >= sarima_lower) & (y_true <= sarima_upper)).astype(int)
    ICP_s = covered.mean()
    MIW_s = np.mean(sarima_upper - sarima_lower)
    print("SARIMA → ICP:", ICP_s, "MIW:", MIW_s)
except Exception as e:
    print("SARIMA failed:", e)

# Prophet
try:
    from prophet import Prophet
    pf_df = df[['date','Close']].rename(columns={'date':'ds','Close':'y'})
    m = Prophet(daily_seasonality=False, yearly_seasonality=True)
    m.fit(pf_df[:-len(y_test)])
    future = m.make_future_dataframe(periods=len(y_test))
    fcst = m.predict(future)
    pf_mean = fcst['yhat'].values[-len(y_test):]
    pf_lower = fcst['yhat_lower'].values[-len(y_test):]
    pf_upper = fcst['yhat_upper'].values[-len(y_test):]
    covered = ((y_true >= pf_lower) & (y_true <= pf_upper)).astype(int)
    ICP_p = covered.mean()
    MIW_p = np.mean(pf_upper - pf_lower)
    print("Prophet → ICP:", ICP_p, "MIW:", MIW_p)
except Exception as e:
    print("Prophet failed:", e)


SARIMA → ICP: 0.942249822180883 MIW: 4287.471345514768
Prophet failed: "['date'] not in index"


In [31]:
# ensure everything is numpy array and 1D
y_true_arr = np.array(y_true).flatten()
y_pred_arr = np.array(y_pred).flatten()
mc_lower_arr = np.array(lower).flatten()
mc_upper_arr = np.array(upper).flatten()
q_lower_arr = np.array(q_lower).flatten()
q_upper_arr = np.array(q_upper).flatten()

results = pd.DataFrame({
    "y_true": y_true_arr,
    "point_pred": y_pred_arr,
    "mc_lower": mc_lower_arr,
    "mc_upper": mc_upper_arr,
    "q_lower": q_lower_arr,
    "q_upper": q_upper_arr
})
results.to_csv("forecast_results.csv", index=False)
print("Saved all results to forecast_results.csv ✅")


Saved all results to forecast_results.csv ✅
