In [46]:
import numpy as np
import pandas as pd
from typing import List, Tuple
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from scipy.io import loadmat
import plotly.graph_objects as go
from timeseries_split import load_time_series, create_lagged_features, train_test_split_lagged, plot_lagged_chunks

# Hyperparameters
n_lags = 15
train_size = 0.8
total_size = 200
stride = 200
mat_file_path = 'Xtrain.mat'  # <- Replace with your .mat file path

# Load and preprocess the data
series = load_time_series(mat_file_path)
lagged_df = create_lagged_features(series, n_lags)
# splits = train_test_split_lagged(df=lagged_df, train_size=train_size, total_size=total_size, stride=stride)
splits = train_test_split_lagged(lagged_df, train_size)

In [None]:
n_rows_train = len(splits[0][0])
n_rows_test = len(splits[0][1])

lagged_df_train = splits[0][0]
lagged_df_test = splits[0][1]

# Create synthethic 'ds' columns for Prophet
lagged_df_train['ds'] = pd.date_range(start='2000-01-01', periods = n_rows_train, 
                                      freq = 'D')

# Get the last date from training set
last_train_date = lagged_df_train['ds'].iloc[-1]

# Create 'ds' for test set starting from the next day
lagged_df_test['ds'] = pd.date_range(start=last_train_date + pd.Timedelta(days=1),
                                     periods=n_rows_test, freq='D')

print(lagged_df_train.columns)
print(lagged_df_test.columns)

In [None]:
from prophet import Prophet

# Identify lag columns (anything that starts with 'lag_')
lag_columns = [col for col in lagged_df_train.columns if col.startswith('lag_')]

# Initialize Prophet
model = Prophet()

# Add lag features as regressors
for lag in lag_columns:
    model.add_regressor(lag)

# Fit the model
model.fit(lagged_df_train[['ds', 'y'] + lag_columns])

# Predict
forecast = model.predict(lagged_df_test[['ds'] + lag_columns])

In [None]:
# Add actual values to the forecast DataFrame for comparison
forecast['y_actual'] = lagged_df_test['y'].values

# Compute metrics
mse = mean_squared_error(forecast['y_actual'], forecast['yhat'])
mae = mean_absolute_error(forecast['y_actual'], forecast['yhat'])

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

# Optional: visualize predictions vs actual values
import matplotlib.pyplot as plt

plt.plot(forecast['ds'], forecast['y_actual'], label='Actual', linewidth=2)
plt.plot(forecast['ds'], forecast['yhat'], label='Prophet', linewidth=2)
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'],
                 alpha=0.2, label='Confidence Interval')
plt.xlabel('Time (sequential dates)')
plt.ylabel('Value')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()
