In [None]:
import numpy as np
import pandas as pd
from typing import List, Tuple
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from scipy.io import loadmat
import plotly.graph_objects as go
from timeseries_split import load_time_series, create_lagged_features, train_test_split_lagged, plot_lagged_chunks

# Hyperparameters
n_lags = 15
train_size = 0.8
total_size = 200
stride = 200
mat_file_path = 'Xtrain.mat'  # <- Replace with your .mat file path

# Load and preprocess the data
series = load_time_series(mat_file_path)
lagged_df = create_lagged_features(series, n_lags)
# splits = train_test_split_lagged(df=lagged_df, train_size=train_size, total_size=total_size, stride=stride)
splits = train_test_split_lagged(lagged_df, train_size)

In [None]:
n_rows_train = len(splits[0][0])
n_rows_test = len(splits[0][1])

lagged_df_train = splits[0][0]
lagged_df_test = splits[0][1]

# Create synthethic 'ds' columns for Prophet
lagged_df_train['ds'] = pd.date_range(start='2000-01-01', periods = n_rows_train, 
                                      freq = 'D')

# Get the last date from training set
last_train_date = lagged_df_train['ds'].iloc[-1]

# Create 'ds' for test set starting from the next day
lagged_df_test['ds'] = pd.date_range(start=last_train_date + pd.Timedelta(days=1),
                                     periods=n_rows_test, freq='D')

print(lagged_df_train.columns)
print(lagged_df_test.columns)

In [None]:
from prophet import Prophet

# Identify lag columns (anything that starts with 'lag_')
lag_columns = [col for col in lagged_df_train.columns if col.startswith('lag_')]

# Initialize Prophet
model = Prophet()

# Add lag features as regressors
for lag in lag_columns:
    model.add_regressor(lag)

# Fit the model
model.fit(lagged_df_train[['ds', 'y'] + lag_columns])

# Predict
forecast = model.predict(lagged_df_test[['ds'] + lag_columns])

In [None]:
# Add actual values to the forecast DataFrame for comparison
forecast['y_actual'] = lagged_df_test['y'].values

# Compute metrics
mse = mean_squared_error(forecast['y_actual'], forecast['yhat'])
mae = mean_absolute_error(forecast['y_actual'], forecast['yhat'])

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

# Optional: visualize predictions vs actual values
import matplotlib.pyplot as plt

plt.plot(forecast['ds'], forecast['y_actual'], label='Actual', linewidth=2)
plt.plot(forecast['ds'], forecast['yhat'], label='Prophet', linewidth=2)
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'],
                 alpha=0.2, label='Confidence Interval')
plt.xlabel('Time (sequential dates)')
plt.ylabel('Value')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from prophet import Prophet
import matplotlib.pyplot as plt
from timeseries_split import load_time_series, create_lagged_features, train_test_split_lagged, plot_lagged_chunks
from sklearn.metrics import mean_squared_error

# Parameters
candidate_lags = list(range(1, 41))
val_ratio = 0.2
n_folds = 5

# Load your time series
series = load_time_series('Xtrain.mat')  # your own loader

results = []

for n_lags in candidate_lags:
    print(f"\nEvaluating n_lags = {n_lags}")

    lagged_df = create_lagged_features(series, n_lags).copy()
    lagged_df['ds'] = pd.date_range(start = '2000-01-01', periods = len(lagged_df), freq = 'D')
    lag_columns = [col for col in lagged_df.columns if col.startswith('lag_')]

    fold_size = len(lagged_df) // n_folds
    prophet_maes, linear_maes, relative_scores, weights = [], [], [], []

    train_folds = pd.DataFrame()

    for fold in range(n_folds):
        print(f"\n Evaluating fold = {fold}")
        train_end = (fold + 1) * fold_size
        new_train_data = lagged_df.iloc[:train_end]

        train_folds = pd.concat([train_folds, new_train_data])

        val_split = int(len(new_train_data) * (1 - val_ratio))
        val_fold = new_train_data.iloc[val_split:]

        # Identify lag columns (anything that starts with 'lag_')
        lag_columns = [col for col in train_folds.columns if col.startswith('lag_')]

        # Initialize the prophet
        model = Prophet()

        # Add lag features as regressors
        for lag in lag_columns:
            model.add_regressor(lag)

        # Fit the model
        model.fit(train_folds[['ds', 'y'] + lag_columns])
        forecast = model.predict(val_fold[['ds'] + lag_columns])
        
        # Add the actual values from the validation fold and obtain metrics
        forecast['y_actual'] = val_fold['y'].values
        mse = mean_squared_error(forecast['y_actual'], forecast['yhat'])
        mae = mean_absolute_error(forecast['y_actual'], forecast['yhat'])

        print("Mean Squared Error:", mse)
        print("Mean Absolute Error:", mae)

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from prophet import Prophet
from timeseries_split import load_time_series, create_lagged_features

# Parameters
candidate_lags = list(range(1, 41))
val_ratio = 0.2
n_folds = 5

series = load_time_series('Xtrain.mat')
results = []

for n_lags in candidate_lags:
    print(f"\nEvaluating n_lags = {n_lags}")
    lagged_df = create_lagged_features(series, n_lags).copy()
    lagged_df['ds'] = pd.date_range(start='2000-01-01', periods=len(lagged_df), freq='D')
    lag_columns = [col for col in lagged_df.columns if col.startswith('lag_')]

    fold_size = len(lagged_df) // n_folds
    train_folds = pd.DataFrame()

    prophet_maes, linear_maes, relative_scores, weights = [], [], [], []

    for fold in range(n_folds):
        print(f" Fold {fold + 1}/{n_folds}")
        train_end = (fold + 1) * fold_size
        new_train_data = lagged_df.iloc[:train_end]
        train_folds = pd.concat([train_folds, new_train_data])
        val_split = int(len(new_train_data) * (1 - val_ratio))
        val_fold = new_train_data.iloc[val_split:]

        # Prophet
        prophet = Prophet()
        for lag in lag_columns:
            prophet.add_regressor(lag)
        prophet.fit(train_folds[['ds', 'y'] + lag_columns])
        forecast = prophet.predict(val_fold[['ds'] + lag_columns])
        forecast['y_actual'] = val_fold['y'].values
        prophet_mae = mean_absolute_error(forecast['y_actual'], forecast['yhat'])

        # Linear regression
        lr = LinearRegression()
        lr.fit(train_folds[lag_columns], train_folds['y'])
        y_pred_lr = lr.predict(val_fold[lag_columns])
        linear_mae = mean_absolute_error(val_fold['y'], y_pred_lr)

        # Relative score
        relative_score = prophet_mae / linear_mae
        weight = fold + 1  # increasing weight for later folds

        # Save metrics
        prophet_maes.append(prophet_mae)
        linear_maes.append(linear_mae)
        relative_scores.append(relative_score)
        weights.append(weight)

        print(f" Prophet MAE: {prophet_mae:.4f}, Linear MAE: {linear_mae:.4f}, Relative: {relative_score:.4f}")

    weighted_score = np.average(relative_scores, weights=weights)
    results.append({'n_lags': n_lags, 'weighted_relative_score': weighted_score})

# Find best n_lags
best_result = min(results, key=lambda x: x['weighted_relative_score'])
best_lags = best_result['n_lags']
print(f"\nBest n_lags: {best_lags} with weighted relative score: {best_result['weighted_relative_score']:.4f}")

# === Train final model on full data ===
final_df = create_lagged_features(series, best_lags).copy()
final_df['ds'] = pd.date_range(start='2000-01-01', periods=len(final_df), freq='D')
final_lag_cols = [col for col in final_df.columns if col.startswith('lag_')]

final_model = Prophet()
for lag in final_lag_cols:
    final_model.add_regressor(lag)

final_model.fit(final_df[['ds', 'y'] + final_lag_cols])
print("✅ Final Prophet model trained on full dataset.")


In [9]:
series = load_time_series('Xtrain.mat')
# Normalize the series then train
normalized_series = (series - np.mean(series)) / np.std(series)
original_series = (normalized_series * np.std(series)) + np.mean(series)


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from prophet import Prophet
from timeseries_split import load_time_series, create_lagged_features

# Parameters
candidate_lags = list(range(1, 41))
val_ratio = 0.2
n_folds = 5

# Load the raw time series
series = load_time_series('Xtrain.mat')
# Normalize the series then train
series = (series - np.mean(series)) / np.std(series)

results = []

for n_lags in candidate_lags:
    print(f"\nEvaluating n_lags = {n_lags}")
    
    # Create lagged DataFrame
    lagged_df = create_lagged_features(series, n_lags).copy()
    lagged_df['ds'] = pd.date_range(start='2000-01-01', periods=len(lagged_df), freq='D')
    lag_columns = [col for col in lagged_df.columns if col.startswith('lag_')]

    fold_size = len(lagged_df) // n_folds
    train_folds = pd.DataFrame()
    prophet_maes = []
    weights = []

    for fold in range(n_folds):
        print(f" Fold {fold + 1}/{n_folds}")

        # Accumulate training data up to current fold
        train_end = (fold + 1) * fold_size
        new_train_data = lagged_df.iloc[:train_end]
        train_folds = pd.concat([train_folds, new_train_data])

        # Validation set from last part of new_train_data
        val_split = int(len(new_train_data) * (1 - val_ratio))
        val_fold = new_train_data.iloc[val_split:]

        # Initialize and train Prophet
        prophet = Prophet()
        for lag in lag_columns:
            prophet.add_regressor(lag)
        prophet.fit(train_folds[['ds', 'y'] + lag_columns])

        # Predict on validation fold
        forecast = prophet.predict(val_fold[['ds'] + lag_columns])
        forecast['y_actual'] = val_fold['y'].values
        prophet_mae = mean_absolute_error(forecast['y_actual'], forecast['yhat'])

        # Store MAE and weight
        prophet_maes.append(prophet_mae)
        weights.append(fold + 1)  # Give more weight to later folds

        print(f"  Prophet MAE: {prophet_mae:.4f}")

    # Weighted average of Prophet MAEs
    weighted_mae = np.average(prophet_maes, weights=weights)
    results.append({'n_lags': n_lags, 'weighted_mae': weighted_mae})

# Select best n_lags based on lowest weighted Prophet MAE
best_result = min(results, key=lambda x: x['weighted_mae'])
best_lags = best_result['n_lags']
print(f"\nBest n_lags: {best_lags} with weighted Prophet MAE: {best_result['weighted_mae']:.4f}")

# === Train final model on full dataset ===
final_df = create_lagged_features(series, best_lags).copy()
final_df['ds'] = pd.date_range(start='2000-01-01', periods=len(final_df), freq='D')
final_lag_cols = [col for col in final_df.columns if col.startswith('lag_')]

final_model = Prophet()
for lag in final_lag_cols:
    final_model.add_regressor(lag)

final_model.fit(final_df[['ds', 'y'] + final_lag_cols])

In [4]:

# Python
future = final_model.make_future_dataframe(periods=200)
future.tail
forecast = prophet.predict(future)
prophet.plot(forecast)

ValueError: Regressor 'lag_1' missing from dataframe

In [None]:
import plotly.graph_objects as go

# Predict on full dataset
forecast_full = final_model.predict(final_df[['ds'] + final_lag_cols])
forecast_full['y_actual'] = final_df['y'].values
forecast_full['y_actual_scaled_back'] = (forecast_full['y_actual'] * np.std(series)) + np.mean(series)
forecast_full['yhat_scaled_back'] = (forecast_full['yhat'] * np.std(series)) + np.mean(series)


# Create plotly figure
fig = go.Figure()

# Actual values
fig.add_trace(go.Scatter(
    x=forecast_full['ds'],
    y=forecast_full['y_actual_scaled_back'],
    mode='lines',
    name='Actual',
    line=dict(color='black')
))

# Forecast
fig.add_trace(go.Scatter(
    x=forecast_full['ds'],
    y=forecast_full['yhat_scaled_back'],
    mode='lines',
    name='Forecast (yhat)',
    line=dict(color='blue')
))

# Layout settings
fig.update_layout(
    title='Prophet Forecast on Full Dataset',
    xaxis_title='Date',
    yaxis_title='Value',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    height=600
)

fig.show()
