In [6]:
# Dependencies
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras import Input
from tensorflow.keras.callbacks import EarlyStopping

# Load data (FRED-MD)
df = pd.read_csv('../current.csv')

# Remove the first row (transformation codes)
transformation_codes = df.iloc[0]  # Transformation codes can be applied if needed
df = df.iloc[1:]

# Set the first column as the index and datetime
df.set_index(df.columns[0], inplace=True)
df.index = pd.to_datetime(df.index)

# Dropna
data = df.dropna()

# Create train data and target
target = (data['CPIAUCSL'].diff(12) / data['CPIAUCSL'].shift(12)) * 100
target = target.shift(-12).dropna()
data = data.loc[target.index]
train = data.dropna()

In [7]:
import os
import tensorflow as tf

# Set deterministic variables
SEED = 42

# Set seeds for reproducibility
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

np.random.seed(SEED)
tf.random.set_seed(SEED)

# Disable GPU for determinism
tf.config.experimental.set_visible_devices([], 'GPU')


In [8]:

series = target.values  # your univariate series

for forecast_horizon in range(4, 12*5 + 4, 12):
    for context_window in [1, 2, 4, 6, 12, 24]:
        for dropout_rate in [0.0, 0.1, 0.2, 0.3]:
            
            # Check if there's enough data for this combination
            total_required = context_window + forecast_horizon

            # Cutoff to ensure test data is not included in training
            cutoff = len(series) - forecast_horizon
            series_trainval = series[:cutoff]
            series_test = series[cutoff:]

            # Build training/val data
            X, y = [], []
            for i in range(len(series_trainval) - context_window):
                X.append(series_trainval[i : i + context_window])
                y.append(series_trainval[i + context_window])
            X = np.array(X).reshape(-1, context_window, 1)
            y = np.array(y).reshape(-1, 1)

            # Split train/val
            n = X.shape[0]
            val_size = int(n * 0.1)
            X_train = X[: n - val_size]
            y_train = y[: n - val_size]
            X_val   = X[n - val_size :]
            y_val   = y[n - val_size :]

            # Fit scalers ONLY on training data
            scaler_X = MinMaxScaler()
            X_train_scaled = scaler_X.fit_transform(
                X_train.reshape(-1, 1)
            ).reshape(X_train.shape)
            X_val_scaled = scaler_X.transform(
                X_val.reshape(-1, 1)
            ).reshape(X_val.shape)

            scaler_y = MinMaxScaler()
            y_train_scaled = scaler_y.fit_transform(y_train)
            y_val_scaled = scaler_y.transform(y_val)

            # Build model
            model = Sequential([
                Input((context_window, 1)),
                LSTM(64, return_sequences=True),
                Dropout(dropout_rate),
                LSTM(64),
                Dropout(dropout_rate),
                Dense(1)
            ])
            model.compile(optimizer='adam', loss='mse')

            # Train
            es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
            model.fit(
                X_train_scaled, y_train_scaled,
                epochs=100,
                batch_size=32,
                shuffle=False,
                validation_data=(X_val_scaled, y_val_scaled),
                callbacks=[es],
                verbose=0
            )

            # Prepare input history ending BEFORE test data
            history = series[cutoff - context_window : cutoff].copy()
            forecast = []
            for _ in range(forecast_horizon):
                x_in = scaler_X.transform(history.reshape(-1, 1)).reshape(1, context_window, 1)
                yhat_s = model.predict(x_in, verbose=0)
                yhat = scaler_y.inverse_transform(yhat_s)[0, 0]
                forecast.append(yhat)
                history = np.append(history[1:], yhat)

            # Evaluate
            y_true = series_test
            rmse = np.sqrt(mean_squared_error(y_true, forecast))
            mae = mean_absolute_error(y_true, forecast)

            # — log to CSV  —
            fp = "uni_ar_lstm.csv"
            df_log = pd.read_csv(fp)
            new_row = {
                'forecast_horizon': forecast_horizon,
                'context_window':   context_window,
                'dropout_rate':     dropout_rate,
                'rmse':             rmse,
                'mae':              mae,
                'forecast':         list(forecast),
                'actual':        list(y_true.tolist())
            }
            df_log = pd.concat([df_log, pd.DataFrame([new_row])], ignore_index=True)
            df_log.to_csv(fp, index=False)


  df_log = pd.concat([df_log, pd.DataFrame([new_row])], ignore_index=True)
