# Attempting Non-Linear Modelling for Time-Series Forecasting

In [1]:
# suppress warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

In [2]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sml
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
import datetime
import shap
from pmdarima import auto_arima

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# get detrended gt data
detrended_trends_df = pd.read_csv('./detrended_google_trends_data.csv')
display(detrended_trends_df)

Unnamed: 0,Month,cluster_0,cluster_1,cluster_2,cluster_3,cluster_4,cluster_5,cluster_6,cluster_7,cluster_8,cluster_9,cluster_10,cluster_11,cluster_12
0,2011-01-01,58.0,984.2,-34.674853,12.265352,-38.543168,1813.0,-37.247719,27.393146,-26.353890,88.0,71.972304,53.0,12.209687
1,2011-02-01,56.0,953.6,-38.634425,60.135251,38.551132,1971.6,-38.928577,25.214203,32.212519,138.0,43.370200,53.0,6.516475
2,2011-03-01,58.0,965.6,15.189084,58.995884,-0.113440,2112.5,27.362487,12.089573,8.703165,133.0,15.639949,89.0,13.240377
3,2011-04-01,57.0,935.8,-22.201174,63.845241,15.459047,1846.8,-34.776741,23.018628,-15.881686,104.0,-25.325198,57.0,6.277186
4,2011-05-01,57.0,927.1,-10.802047,27.681307,66.264518,2029.8,-26.148478,6.000740,50.458231,119.0,-9.831991,53.0,-1.477304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,2024-11-01,55.0,716.4,150.990991,-708.490871,-498.038005,743.0,-118.844748,-108.091417,-148.082623,102.0,-684.072739,96.0,-273.809202
167,2024-12-01,53.0,718.3,155.187379,-881.323792,-563.679889,604.4,-125.233229,-90.862949,-233.455920,68.0,-779.178754,101.0,-297.274710
168,2025-01-01,61.0,763.2,165.690055,-671.400097,-530.756567,800.0,-123.817629,-126.684515,-140.860896,98.0,-746.933297,102.0,-302.121445
169,2025-02-01,59.0,699.4,197.502170,-750.221800,-579.272110,823.4,-131.600162,-97.556743,-126.297285,97.0,-737.443119,89.0,-314.053616


In [4]:
# inflation (2011 - Mar 2025)
inflation_data = pd.read_csv('./yoy_inflation_till_March_2025.csv')
inflation_data.head()

Unnamed: 0,Month,Year-on-Year Change (%)
0,Jan '11,12.079665
1,Feb '11,11.099837
2,Mar '11,12.779128
3,Apr '11,11.291282
4,May '11,12.352128


In [5]:
# Convert string to datetime and then format
def format_date(date_str):
    date_obj = datetime.datetime.strptime(date_str, '%Y-%m')
    return date_obj.strftime('%b \'%y')

In [6]:
inflation_data = inflation_data.rename(columns={'Year-on-Year Change (%)': 'Inflation rate'})
inflation_data['Month'] = pd.to_datetime(inflation_data['Month'], format='%b \'%y')
inflation_data.head()

Unnamed: 0,Month,Inflation rate
0,2011-01-01,12.079665
1,2011-02-01,11.099837
2,2011-03-01,12.779128
3,2011-04-01,11.291282
4,2011-05-01,12.352128


In [7]:
# # merge the two dataframes on the Month column
# merged_data = pd.merge(detrended_trends_df, inflation_data, on='Month', how='left')
# # display the first few rows of the merged DataFrame
# merged_data.tail()

# First, let's check the data types
print("detrended_trends_df Month dtype:", detrended_trends_df['Month'].dtype)
print("inflation_data Month dtype:", inflation_data['Month'].dtype)

# Convert both to datetime
detrended_trends_df['Month'] = pd.to_datetime(detrended_trends_df['Month'])
inflation_data['Month'] = pd.to_datetime(inflation_data['Month'])

# Now merge the dataframes
merged_data = pd.merge(detrended_trends_df, inflation_data, on='Month', how='left')

# Verify the merge worked
print("\nMerged data shape:", merged_data.shape)
merged_data.tail()

detrended_trends_df Month dtype: object
inflation_data Month dtype: datetime64[ns]

Merged data shape: (171, 15)


Unnamed: 0,Month,cluster_0,cluster_1,cluster_2,cluster_3,cluster_4,cluster_5,cluster_6,cluster_7,cluster_8,cluster_9,cluster_10,cluster_11,cluster_12,Inflation rate
166,2024-11-01,55.0,716.4,150.990991,-708.490871,-498.038005,743.0,-118.844748,-108.091417,-148.082623,102.0,-684.072739,96.0,-273.809202,34.598334
167,2024-12-01,53.0,718.3,155.187379,-881.323792,-563.679889,604.4,-125.233229,-90.862949,-233.45592,68.0,-779.178754,101.0,-297.27471,34.796057
168,2025-01-01,61.0,763.2,165.690055,-671.400097,-530.756567,800.0,-123.817629,-126.684515,-140.860896,98.0,-746.933297,102.0,-302.121445,28.871419
169,2025-02-01,59.0,699.4,197.50217,-750.2218,-579.27211,823.4,-131.600162,-97.556743,-126.297285,97.0,-737.443119,89.0,-314.053616,27.941947
170,2025-03-01,134.0,743.1,216.626875,-780.590913,-586.23059,745.5,-121.583045,-104.480263,-164.764823,90.0,-783.614968,95.0,-324.475429,27.018329


In [8]:
# get train and test temporal split

# TRAIN: Jan 2011 - Mar 2017; TEST: Apr 2017 - Mar 2025

# split_date = '2018-07-01'
split_date = '2017-04-01'

# Create train-test split
train_data = merged_data[merged_data['Month'] < split_date]
test_data = merged_data[merged_data['Month'] >= split_date]

# Get features (X) and target (y)
target_col = 'Inflation rate'
feature_cols = [col for col in merged_data.columns if col not in ['Month', target_col]]

X_train = train_data[feature_cols]
y_train = train_data[target_col]
X_test = test_data[feature_cols]
y_test = test_data[target_col]

# Print shapes to verify split
print("Training set shapes:")
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print("\nTest set shapes:")
print(f"X_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

# Display split point information
print("\nTrain period:", train_data['Month'].min().strftime('%Y-%m'), "to", train_data['Month'].max().strftime('%Y-%m'))
print("Test period:", test_data['Month'].min().strftime('%Y-%m'), "to", test_data['Month'].max().strftime('%Y-%m'))

Training set shapes:
X_train: (75, 13)
y_train: (75,)

Test set shapes:
X_test: (96, 13)
y_test: (96,)

Train period: 2011-01 to 2017-03
Test period: 2017-04 to 2025-03


In [9]:
# calculate eval metrics
def calculate_metrics(y_true, y_pred):

    # Convert inputs to numpy arrays
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()

    # from sklearn.utils.validation import check_array
    # # Convert inputs to numpy arrays with new parameter name
    # y_true = check_array(y_true.reshape(-1, 1), ensure_all_finite=True).flatten()
    # y_pred = check_array(y_pred.reshape(-1, 1), ensure_all_finite=True).flatten()

    r_squared = 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))
    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
    rmspe = np.sqrt(np.mean(((y_true - y_pred) / y_true) ** 2)) * 100
    mae = np.mean(np.abs(y_true - y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    smape = 100 * np.mean(np.abs(y_pred - y_true) / ((np.abs(y_true) + np.abs(y_pred)) / 2) )
    
    return r_squared, mse, rmse, rmspe, mae, mape, smape



## TFT

In [10]:
import torch
import torchvision
print(torch.__version__)  # Check PyTorch version
print(torchvision.__version__)  # Check torchvision version
print(torch.cuda.is_available())  # Should return True if using GPU


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/martinsawojide/miniforge3/envs/inflation_nowcasting/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/martinsawojide/miniforge3/envs/inflation_nowcasting/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/martinsawojide/miniforge3/envs/inflation_nowcasting/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/martinsawojide/miniforge3/envs

2.0.1+cu117
0.15.2+cu117
True


In [11]:
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import SMAPE, MAE, RMSE
from pytorch_forecasting.data import GroupNormalizer
from pytorch_lightning.callbacks import EarlyStopping
import pytorch_lightning as pl
import torch
import pandas as pd
import matplotlib.pyplot as plt

# Set random seeds for reproducibility
pl.seed_everything(42)
torch.backends.cudnn.deterministic = True

# Prepare data for PyTorch Forecasting
def prepare_data_for_pytorch(df, time_idx_column='time_idx'):
    """Prepare data for PyTorch Forecasting"""
    df = df.sort_values('Month')
    df[time_idx_column] = range(len(df))
    df['series'] = 0  # Single series
    return df

# Create training and validation datasets
def create_datasets(df, max_prediction_length=12, max_encoder_length=24):
    """Create training and validation datasets"""
    training = TimeSeriesDataSet(
        data=df[lambda x: x.time_idx <= x.time_idx.max() - max_prediction_length],
        time_idx="time_idx",
        target="Inflation rate",
        group_ids=["series"],
        time_varying_known_reals=[col for col in df.columns if col not in ['Month', 'time_idx', 'series', 'Inflation rate']],
        time_varying_unknown_reals=["Inflation rate"],
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        target_normalizer=GroupNormalizer(groups=["series"]),
    )
    validation = TimeSeriesDataSet.from_dataset(
        training,
        df,
        min_prediction_idx=df[lambda x: x.time_idx <= x.time_idx.max() - max_prediction_length].time_idx.max() + 1,
        stop_randomization=True
    )
    return training, validation

# Prepare data
prepared_data = prepare_data_for_pytorch(merged_data)

# Create datasets
training, validation = create_datasets(prepared_data)

# Create data loaders
batch_size = 32
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size)

# Initialize Temporal Fusion Transformer model
model = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.01,
    hidden_size=32,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=SMAPE(),
    optimizer="ranger"
)

# Initialize PyTorch Lightning Trainer
trainer = pl.Trainer(
    max_epochs=100,
    accelerator="auto",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    callbacks=[EarlyStopping(monitor="val_loss", patience=10)],
)

# Train the model
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

# Make predictions
predictions = model.predict(val_dataloader)

# Evaluate model performance
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = predictions.detach()

# Calculate metrics
mae = MAE()(predictions, actuals)
rmse = RMSE()(predictions, actuals)
smape = SMAPE()(predictions, actuals)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"SMAPE: {smape:.2f}")

# Plot predictions
plt.figure(figsize=(10, 6))
plt.plot(actuals.numpy(), label='Actual')
plt.plot(predictions.numpy(), label='Predicted')
plt.title('Inflation Rate: Actual vs Predicted')
plt.xlabel('Time Steps')
plt.ylabel('Inflation Rate')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Save model
torch.save(model.state_dict(), 'tft_inflation_model.pth')

AttributeError: module 'torch' has no attribute 'compiler'

# N-HiTS

In [None]:
# Import required libraries
from pytorch_forecasting import TimeSeriesDataSet, NHiTS
from pytorch_forecasting.metrics import SMAPE, MAE, RMSE
from pytorch_forecasting.data import GroupNormalizer
from pytorch_lightning.callbacks import EarlyStopping
import pytorch_lightning as pl
import torch

# Set random seeds for reproducibility
pl.seed_everything(42)
torch.backends.cudnn.deterministic = True

# Prepare data for PyTorch Forecasting
def prepare_data_for_pytorch(df, time_idx_column='time_idx'):
    """Prepare data for PyTorch Forecasting"""
    # Create time index
    df = df.sort_values('Month')
    df[time_idx_column] = range(len(df))
    
    # Add required group column (dummy as we have single series)
    df['series'] = 0
    
    return df

# Create training and validation datasets
def create_datasets(df, max_prediction_length=12, max_encoder_length=24):
    """Create training and validation datasets"""
    
    training = TimeSeriesDataSet(
        data=df[lambda x: x.time_idx <= x.time_idx.max() - max_prediction_length],
        time_idx="time_idx",
        target="Inflation rate",
        group_ids=["series"],
        static_categoricals=[],
        time_varying_known_reals=[col for col in df.columns if col not in 
                                ['Month', 'time_idx', 'series', 'Inflation rate']],
        time_varying_unknown_reals=["Inflation rate"],
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        target_normalizer=GroupNormalizer(groups=["series"]),
    )
    
    validation = TimeSeriesDataSet.from_dataset(
        training,
        df,
        min_prediction_idx=df[lambda x: x.time_idx <= x.time_idx.max() 
                            - max_prediction_length].time_idx.max() + 1,
        stop_randomization=True
    )
    
    return training, validation

# Prepare data
prepared_data = prepare_data_for_pytorch(merged_data)

# Create datasets
training, validation = create_datasets(prepared_data)

# Create data loaders
batch_size = 32
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size)

# Initialize trainer and model
trainer = pl.Trainer(
    max_epochs=100,
    accelerator="auto",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    callbacks=[EarlyStopping(monitor="val_loss", patience=10)],
)

# Initialize N-HiTS model
# nhits = NHiTS.from_dataset(
#     training,
#     learning_rate=0.001,
#     hidden_size=32,
#     dropout=0.1,
#     weight_decay=1e-2,
#     backcast_loss_ratio=0.1,
# )
nhits = NHiTS.from_dataset(
    training,
    hidden_size=64,
    loss=torch.nn.MSELoss(),
    learning_rate=1e-3,
    log_interval=10,
    log_val_interval=1,
    weight_decay=1e-2,
    dropout=0.1,
)

# Train the model
trainer.fit(
    nhits,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

# Make predictions
predictions = nhits.predict(val_dataloader)

# Evaluate model performance
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = predictions.detach()

# Calculate metrics
mae = MAE()(predictions, actuals)
rmse = RMSE()(predictions, actuals)
smape = SMAPE()(predictions, actuals)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"SMAPE: {smape:.2f}")

# Plot predictions
plt.figure(figsize=(10, 6))
plt.plot(actuals.numpy(), label='Actual')
plt.plot(predictions.numpy(), label='Predicted')
plt.title('Inflation Rate: Actual vs Predicted')
plt.xlabel('Time Steps')
plt.ylabel('Inflation Rate')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Save model
torch.save(model.state_dict(), 'nhits_inflation_model.pth')

Seed set to 42
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


TypeError: `model` must be a `LightningModule` or `torch._dynamo.OptimizedModule`, got `NHiTS`