<a href="https://colab.research.google.com/github/MWANIKID/PhD-Research-files/blob/main/5b.%20LTMS_ARIMA_32.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from google.colab import files
uploaded = files.upload()


Saving Tesla_stock.csv to Tesla_stock (1).csv
Saving Bitcoin_data.csv to Bitcoin_data (1).csv


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [10]:
# 📦 Required Libraries
import pandas as pd

# ✅ Load data with correct date format
btc = pd.read_csv("Bitcoin_data.csv", parse_dates=["Date"], date_format="%d/%m/%Y")
tesla = pd.read_csv("Tesla_stock.csv", parse_dates=["Date"], date_format="%d/%m/%Y")

# ✅ Set Date as index
btc.set_index("Date", inplace=True)
tesla.set_index("Date", inplace=True)

# ✅ Filter to aligned analysis range
btc = btc.loc["2014-10-01":"2024-12-30"]
tesla = tesla.loc["2014-10-01":"2024-12-30"]


In [14]:
import pandas as pd

# ✅ Read with correct format
btc = pd.read_csv("Bitcoin_data.csv", dayfirst=True, parse_dates=["Date"])
tesla = pd.read_csv("Tesla_stock.csv", dayfirst=True, parse_dates=["Date"])

# ✅ Set index to Date
btc.set_index("Date", inplace=True)
tesla.set_index("Date", inplace=True)

# ✅ Confirm datetime index and apply global filter
btc.index = pd.to_datetime(btc.index, dayfirst=True)
tesla.index = pd.to_datetime(tesla.index, dayfirst=True)

# ✅ Global date filter
btc = btc.loc["2014-10-01":"2024-12-30"]
tesla = tesla.loc["2014-10-01":"2024-12-30"]


In [15]:
# Ensure 'Date' is the index only if it's still a column
if "Date" in btc.columns:
    btc.set_index("Date", inplace=True)
if "Date" in tesla.columns:
    tesla.set_index("Date", inplace=True)

# Confirm index is datetime
btc.index = pd.to_datetime(btc.index, dayfirst=True)
tesla.index = pd.to_datetime(tesla.index, dayfirst=True)

# Optional: sort and print ranges
btc.sort_index(inplace=True)
tesla.sort_index(inplace=True)

print("✅ BTC Range:", btc.index.min().date(), "to", btc.index.max().date())
print("✅ Tesla Range:", tesla.index.min().date(), "to", tesla.index.max().date())


✅ BTC Range: 2014-10-01 to 2024-12-30
✅ Tesla Range: 2014-10-01 to 2024-12-30


In [None]:
# 📂 Step 3: Read data & format
import pandas as pd

btc = pd.read_csv("Bitcoin_data.csv")
tesla = pd.read_csv("Tesla_stock.csv")

btc['Date'] = pd.to_datetime(btc['Date'], dayfirst=True)
tesla['Date'] = pd.to_datetime(tesla['Date'], dayfirst=True)

btc.set_index("Date", inplace=True)
tesla.set_index("Date", inplace=True)

# Optional: check ranges
print("BTC:", btc.index.min(), "→", btc.index.max())
print("Tesla:", tesla.index.min(), "→", tesla.index.max())


BTC: 2014-09-17 00:00:00 → 2024-12-30 00:00:00
Tesla: 2010-07-01 00:00:00 → 2024-12-30 00:00:00


In [16]:
# 📅 Step 4: Define periods (aligned with global range: 2014-10-01 to 2024-12-30)
periods = {
    "Period 1": ("2014-10-01", "2019-08-31"),
    "Period 2": ("2019-09-01", "2024-12-30"),
    "Period 3": ("2014-10-01", "2024-12-30")
}


In [17]:
# 🧠 Step 5: Hybrid ARIMA + LSTM Function
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os

os.makedirs("plots", exist_ok=True)

def run_hybrid_arima_lstm(asset_name, df, period_label):
    close_series = df["Close"].dropna().values
    if len(close_series) < 100:
        print(f"⚠️ Skipping {asset_name} - {period_label} (insufficient data)")
        return None

    scaler = MinMaxScaler()
    scaled_series = scaler.fit_transform(close_series.reshape(-1, 1)).flatten()

    arima_model = ARIMA(scaled_series, order=(5,1,0)).fit()
    arima_pred = arima_model.predict(start=1, end=len(scaled_series)-1)
    residuals = scaled_series[1:] - arima_pred

    # Create sequences
    def create_sequences(data, window=30):
        X, y = [], []
        for i in range(window, len(data)):
            X.append(data[i-window:i])
            y.append(data[i])
        return np.array(X), np.array(y)

    X_resid, y_resid = create_sequences(residuals)
    split = int(0.8 * len(X_resid))
    X_train, X_test = X_resid[:split], X_resid[split:]
    y_train, y_test = y_resid[:split], y_resid[split:]

    model = Sequential([LSTM(64, input_shape=(X_train.shape[1], 1)), Dense(1)])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=30, batch_size=32, verbose=0)

    lstm_pred = model.predict(X_test).flatten()
    arima_pred_shifted = arima_pred[-len(lstm_pred):]
    hybrid_pred_scaled = arima_pred_shifted + lstm_pred

    y_true_scaled = scaled_series[-len(lstm_pred):]
    y_true = scaler.inverse_transform(y_true_scaled.reshape(-1, 1))
    hybrid_pred = scaler.inverse_transform(hybrid_pred_scaled.reshape(-1, 1))

    # 🔢 Metrics
    rmse = np.sqrt(mean_squared_error(y_true, hybrid_pred))
    mse = mean_squared_error(y_true, hybrid_pred)
    mae = mean_absolute_error(y_true, hybrid_pred)
    mape = np.mean(np.abs((y_true - hybrid_pred) / np.maximum(np.abs(y_true), 1e-10))) * 100
    r2 = r2_score(y_true, hybrid_pred)

    # 📊 Save plot
    plt.figure(figsize=(12, 5))
    plt.plot(y_true, label='Actual', color='blue')
    plt.plot(hybrid_pred, label='Hybrid Prediction', color='green')
    plt.title(f"{asset_name} - Hybrid ARIMA + LSTM – {period_label}")
    plt.xlabel("Time"); plt.ylabel("Price")
    plt.legend(); plt.grid(True); plt.tight_layout()

    filename = f"plots/Hybrid_ARIMA_LSTM_{asset_name}_{period_label}.png".replace(" ", "_")
    plt.savefig(filename, dpi=300)
    plt.close()

    return {
        "Asset": asset_name,
        "Period": period_label,
        "RMSE": round(rmse, 2),
        "MSE": round(mse, 2),
        "MAE": round(mae, 2),
        "MAPE": f"{mape:.2f}%",
        "R2": round(r2, 2),
        "Plot": filename
    }


In [18]:
from google.colab import files
import os

# ✅ Ensure plot output directory exists
os.makedirs("plots", exist_ok=True)

# ✅ Updated and aligned date periods
periods = {
    "Period 1": ("2014-10-01", "2019-08-31"),
    "Period 2": ("2019-09-01", "2024-12-30"),
    "Period 3": ("2014-10-01", "2024-12-30")
}

# 🧠 Collect results
btc_results = []
tesla_results = []

# 🔁 Iterate through each aligned period
for period_label, (start, end) in periods.items():
    print(f"\n📅 Processing: {period_label} ({start} to {end})")

    # ⛏️ Slice data for the current period
    btc_period_df = btc.loc[start:end].copy()
    tesla_period_df = tesla.loc[start:end].copy()

    # 📈 Run Hybrid ARIMA+LSTM for Bitcoin
    print(f"🔷 Running Hybrid Model for Bitcoin - {period_label}")
    btc_result = run_hybrid_arima_lstm("Bitcoin", btc_period_df, period_label)
    if btc_result:
        btc_results.append(btc_result)
        print(f"✅ Saved: {btc_result['Plot']}")
        files.download(btc_result["Plot"])

    # 📉 Run Hybrid ARIMA+LSTM for Tesla
    print(f"🔶 Running Hybrid Model for Tesla - {period_label}")
    tesla_result = run_hybrid_arima_lstm("Tesla", tesla_period_df, period_label)
    if tesla_result:
        tesla_results.append(tesla_result)
        print(f"✅ Saved: {tesla_result['Plot']}")
        files.download(tesla_result["Plot"])



📅 Processing: Period 1 (2014-10-01 to 2019-08-31)
🔷 Running Hybrid Model for Bitcoin - Period 1


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_1.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔶 Running Hybrid Model for Tesla - Period 1


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_1.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


📅 Processing: Period 2 (2019-09-01 to 2024-12-30)
🔷 Running Hybrid Model for Bitcoin - Period 2


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_2.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔶 Running Hybrid Model for Tesla - Period 2


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 70ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_2.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


📅 Processing: Period 3 (2014-10-01 to 2024-12-30)
🔷 Running Hybrid Model for Bitcoin - Period 3


  super().__init__(**kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_3.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔶 Running Hybrid Model for Tesla - Period 3


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_3.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# 🔁 Step 6: Run Hybrid ARIMA+LSTM per Period and Download Graphs for Each Asset
from google.colab import files
import os

# Ensure folder exists
os.makedirs("plots", exist_ok=True)

# Periods already defined as:
# periods = {
#     "Period 1": ('2016-01-01', '2019-08-31'),
#     "Period 2": ('2019-09-01', '2023-03-31'),
#     "Period 3": ('2016-01-01', '2023-03-31'),
# }

btc_results = []
tesla_results = []

for period_label, (start, end) in periods.items():
    # Slice data
    btc_period_df = btc.loc[start:end]
    tesla_period_df = tesla.loc[start:end]

    # Run model for Bitcoin
    print(f"📈 Running Hybrid for Bitcoin - {period_label}")
    btc_result = run_hybrid_arima_lstm("Bitcoin", btc_period_df, period_label)
    if btc_result:
        btc_results.append(btc_result)
        print(f"✅ Saved: {btc_result['Plot']}")
        files.download(btc_result["Plot"])

    # Run model for Tesla
    print(f"📉 Running Hybrid for Tesla - {period_label}")
    tesla_result = run_hybrid_arima_lstm("Tesla", tesla_period_df, period_label)
    if tesla_result:
        tesla_results.append(tesla_result)
        print(f"✅ Saved: {tesla_result['Plot']}")
        files.download(tesla_result["Plot"])


📈 Running Hybrid for Bitcoin - Period 1


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_1.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📉 Running Hybrid for Tesla - Period 1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_1.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📈 Running Hybrid for Bitcoin - Period 2


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  
✅ Saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_2.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📉 Running Hybrid for Tesla - Period 2


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_2.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📈 Running Hybrid for Bitcoin - Period 3


  super().__init__(**kwargs)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_3.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📉 Running Hybrid for Tesla - Period 3


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
✅ Saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_3.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [19]:
# 🔁 Step 6: Run Hybrid ARIMA+LSTM per Period and Download Graphs for Each Asset
from google.colab import files
import os

# ✅ Ensure output folder exists
os.makedirs("plots", exist_ok=True)

# ✅ Globally aligned periods
periods = {
    "Period 1": ("2014-10-01", "2019-08-31"),
    "Period 2": ("2019-09-01", "2024-12-30"),
    "Period 3": ("2014-10-01", "2024-12-30")
}

# ✅ Store model output summaries
btc_results = []
tesla_results = []

# 🔁 Loop through each defined period
for period_label, (start, end) in periods.items():
    print(f"\n📅 Period: {period_label} | Range: {start} to {end}")

    # Slice datasets for each period
    btc_period_df = btc.loc[start:end].copy()
    tesla_period_df = tesla.loc[start:end].copy()

    # 📈 Run hybrid model for Bitcoin
    print(f"🔷 Running Hybrid ARIMA+LSTM for Bitcoin – {period_label}")
    btc_result = run_hybrid_arima_lstm("Bitcoin", btc_period_df, period_label)
    if btc_result:
        btc_results.append(btc_result)
        print(f"✅ Bitcoin plot saved: {btc_result['Plot']}")
        files.download(btc_result["Plot"])

    # 📉 Run hybrid model for Tesla
    print(f"🔶 Running Hybrid ARIMA+LSTM for Tesla – {period_label}")
    tesla_result = run_hybrid_arima_lstm("Tesla", tesla_period_df, period_label)
    if tesla_result:
        tesla_results.append(tesla_result)
        print(f"✅ Tesla plot saved: {tesla_result['Plot']}")
        files.download(tesla_result["Plot"])



📅 Period: Period 1 | Range: 2014-10-01 to 2019-08-31
🔷 Running Hybrid ARIMA+LSTM for Bitcoin – Period 1


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
✅ Bitcoin plot saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_1.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔶 Running Hybrid ARIMA+LSTM for Tesla – Period 1


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
✅ Tesla plot saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_1.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


📅 Period: Period 2 | Range: 2019-09-01 to 2024-12-30
🔷 Running Hybrid ARIMA+LSTM for Bitcoin – Period 2


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
✅ Bitcoin plot saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_2.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔶 Running Hybrid ARIMA+LSTM for Tesla – Period 2


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
✅ Tesla plot saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_2.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


📅 Period: Period 3 | Range: 2014-10-01 to 2024-12-30
🔷 Running Hybrid ARIMA+LSTM for Bitcoin – Period 3


  super().__init__(**kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
✅ Bitcoin plot saved: plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_3.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔶 Running Hybrid ARIMA+LSTM for Tesla – Period 3


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
✅ Tesla plot saved: plots/Hybrid_ARIMA_LSTM_Tesla_Period_3.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [21]:
import pandas as pd

# Save hybrid results if available
if btc_results:
    btc_df = pd.DataFrame(btc_results)
    btc_df.to_csv("Hybrid_Bitcoin_Results.csv", index=False)
    print("✅ Saved: Hybrid_Bitcoin_Results.csv")

if tesla_results:
    tesla_df = pd.DataFrame(tesla_results)
    tesla_df.to_csv("Hybrid_Tesla_Results.csv", index=False)
    print("✅ Saved: Hybrid_Tesla_Results.csv")


✅ Saved: Hybrid_Bitcoin_Results.csv
✅ Saved: Hybrid_Tesla_Results.csv


In [22]:
# 📘 Step 8: Create Word Report with Plots and Metrics
!pip install python-docx --quiet

from docx import Document
from docx.shared import Inches
import os

# Load results
btc_df = pd.read_csv("Hybrid_Bitcoin_Results.csv")
tesla_df = pd.read_csv("Hybrid_Tesla_Results.csv")

# Create Word doc
doc = Document()
doc.add_heading("Hybrid ARIMA + LSTM Forecast Summary", level=1)

# 🔹 Bitcoin Section
doc.add_heading("Bitcoin – Hybrid Forecast Results", level=2)

# Table
table = doc.add_table(rows=1, cols=len(btc_df.columns))
table.style = 'Light Grid'
hdr_cells = table.rows[0].cells
for i, col in enumerate(btc_df.columns):
    hdr_cells[i].text = col
for _, row in btc_df.iterrows():
    row_cells = table.add_row().cells
    for i, col in enumerate(btc_df.columns):
        row_cells[i].text = str(row[col])

# Plots
for i in range(1, 4):
    fig_path = f"plots/Hybrid_ARIMA_LSTM_Bitcoin_Period_{i}.png"
    if os.path.exists(fig_path):
        doc.add_paragraph(f"Figure: Bitcoin Forecast – Period {i}")
        doc.add_picture(fig_path, width=Inches(6))

# 🔹 Tesla Section
doc.add_heading("Tesla – Hybrid Forecast Results", level=2)

# Table
table = doc.add_table(rows=1, cols=len(tesla_df.columns))
table.style = 'Light Grid'
hdr_cells = table.rows[0].cells
for i, col in enumerate(tesla_df.columns):
    hdr_cells[i].text = col
for _, row in tesla_df.iterrows():
    row_cells = table.add_row().cells
    for i, col in enumerate(tesla_df.columns):
        row_cells[i].text = str(row[col])

# Plots
for i in range(1, 4):
    fig_path = f"plots/Hybrid_ARIMA_LSTM_Tesla_Period_{i}.png"
    if os.path.exists(fig_path):
        doc.add_paragraph(f"Figure: Tesla Forecast – Period {i}")
        doc.add_picture(fig_path, width=Inches(6))

# Save
final_doc_path = "Hybrid_ARIMA_LSTM_Report.docx"
doc.save(final_doc_path)

# Download in browser
files.download(final_doc_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from statsmodels.tsa.arima.model import ARIMA

# Upload files manually in Colab if needed
# from google.colab import files
# uploaded = files.upload()

# Load datasets
btc = pd.read_csv("Bitcoin_data.csv", parse_dates=["Date"], dayfirst=True)
tesla = pd.read_csv("Tesla_stock.csv", parse_dates=["Date"], dayfirst=True)

btc.set_index("Date", inplace=True)
tesla.set_index("Date", inplace=True)


In [24]:
# ✅ Global date analysis range: 2014-10-01 to 2024-12-30

# 📅 Periods used in modeling (aligned with global range)
periods = {
    "Period 1": {
        "label": "Pre-COVID Bull/Bear Cycles",
        "start": "2014-10-01",
        "end": "2019-08-31"
    },
    "Period 2": {
        "label": "Post-COVID & Modern Market",
        "start": "2019-09-01",
        "end": "2024-12-30"
    },
    "Period 3": {
        "label": "Full Range Analysis",
        "start": "2014-10-01",
        "end": "2024-12-30"
    }
}


In [25]:
def add_features(df, mode):
    df = df.copy()
    if mode == "bollinger":
        df["SMA20"] = df["Close"].rolling(window=20).mean()
        df["STD20"] = df["Close"].rolling(window=20).std()
        df["Upper"] = df["SMA20"] + (df["STD20"] * 2)
        df["Lower"] = df["SMA20"] - (df["STD20"] * 2)
        df.drop(columns=["SMA20", "STD20"], inplace=True)
    elif mode == "returns":
        df["Returns"] = df["Close"].pct_change().fillna(0)
    return df.dropna()


In [26]:
def run_hybrid_arima_lstm(df, feature_cols, asset, period, mode):
    df = df[feature_cols].dropna()
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(df.values)

    close_scaled = scaled[:, 0]  # Close is always the first

    arima_model = ARIMA(close_scaled, order=(5, 1, 0)).fit()
    arima_pred = arima_model.predict(start=1, end=len(close_scaled)-1)
    residuals = close_scaled[1:] - arima_pred

    def create_seq(data, window=30):
        X, y = [], []
        for i in range(window, len(data)):
            X.append(data[i-window:i])
            y.append(data[i])
        return np.array(X), np.array(y)

    X, y = create_seq(residuals)
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    model = Sequential([LSTM(64, input_shape=(X.shape[1], 1)), Dense(1)])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=30, batch_size=32, verbose=0)

    lstm_pred = model.predict(X_test)
    arima_shifted = arima_pred[-len(lstm_pred):]
    hybrid_scaled = arima_shifted + lstm_pred.flatten()

    y_true = close_scaled[-len(lstm_pred):].reshape(-1, 1)
    y_pred = hybrid_scaled.reshape(-1, 1)

    y_true_inv = scaler.inverse_transform(np.hstack([y_true, np.zeros((len(y_true), scaled.shape[1]-1))]))[:, 0]
    y_pred_inv = scaler.inverse_transform(np.hstack([y_pred, np.zeros((len(y_pred), scaled.shape[1]-1))]))[:, 0]

    # Metrics
    rmse = np.sqrt(mean_squared_error(y_true_inv, y_pred_inv))
    mse = mean_squared_error(y_true_inv, y_pred_inv)
    mae = mean_absolute_error(y_true_inv, y_pred_inv)
    mape = np.mean(np.abs((y_true_inv - y_pred_inv) / np.maximum(np.abs(y_true_inv), 1e-10))) * 100
    r2 = r2_score(y_true_inv, y_pred_inv)

    return {
        "Asset": asset,
        "Feature Set": mode,
        "Period": period,
        "RMSE": round(rmse, 2),
        "MSE": round(mse, 2),
        "MAE": round(mae, 2),
        "MAPE": f"{mape:.2f}%",
        "R2": round(r2, 2)
    }


In [28]:
results_all = []

for asset_name, data in [("Bitcoin", btc), ("Tesla", tesla)]:
    for period_label, meta in periods.items():  # ✅ meta is the inner dictionary
        start = meta["start"]
        end = meta["end"]

        df_period = data.loc[start:end]

        # Close + Upper + Lower (Bollinger)
        df_boll = add_features(df_period, "bollinger")
        results_all.append(run_hybrid_arima_lstm(df_boll, ["Close", "Upper", "Lower"], asset_name, period_label, "Close+Bands"))

        # Close + Returns
        df_ret = add_features(df_period, "returns")
        results_all.append(run_hybrid_arima_lstm(df_ret, ["Close", "Returns"], asset_name, period_label, "Close+Returns"))

        # Close Only
        df_close = df_period[["Close"]]
        results_all.append(run_hybrid_arima_lstm(df_close, ["Close"], asset_name, period_label, "Close"))


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(**kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(**kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


In [29]:
results_df = pd.DataFrame(results_all)

# Separate and save
for feature in ["Close", "Close+Returns", "Close+Bands"]:
    df_f = results_df[results_df["Feature Set"] == feature]
    filename = f"Hybrid_LSTM_ARIMA_{feature.replace('+', '_')}.csv"
    df_f.to_csv(filename, index=False)

    # Download
    from google.colab import files
    files.download(filename)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>