In [45]:
import numpy as np
import pandas as pd
import torch
from gluonts.dataset.common import ListDataset
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
from uni2ts.model.moirai_moe import MoiraiMoEForecast, MoiraiMoEModule
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os

In [46]:
# Parameters and settings

# Parameters for data split
WINDOW = 5    # rolling window size to use as predictors
DATE_COL = 'DlyCalDt'
ID_COL = 'PERMNO'
TARGET_COL = 'ExcessReturn'

# Estimation (in sample) period dates
in_sample_start_date = pd.to_datetime("2000-01-01")
in_sample_end_date = pd.to_datetime("2015-12-31")

# Out-of-sample period dates
out_sample_start_date = pd.to_datetime("2016-01-01")
out_sample_end_date = pd.to_datetime("2024-12-31")

# Use GPU if available, else default to using CPU
device_map = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [47]:
# Load the cleaned and filtered data files for in sample and out of sample periods into a pandas DataFrames
in_sample_df = pd.read_csv("../Cleaned Datasets/in_sample_cleaned.csv")
out_sample_df = pd.read_csv("../Cleaned Datasets/out_sample_cleaned.csv")


# Ensure the date columns are in datetime format
in_sample_df[DATE_COL] = pd.to_datetime(in_sample_df[DATE_COL])
out_sample_df[DATE_COL] = pd.to_datetime(out_sample_df[DATE_COL])

in_sample_df = in_sample_df[[ID_COL, DATE_COL, TARGET_COL]]
out_sample_df = out_sample_df[[ID_COL, DATE_COL, TARGET_COL]]

In [48]:
in_sample_df.info()
out_sample_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199550 entries, 0 to 199549
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   PERMNO        199550 non-null  int64         
 1   DlyCalDt      199550 non-null  datetime64[ns]
 2   ExcessReturn  199550 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 4.6 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112400 entries, 0 to 112399
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   PERMNO        112400 non-null  int64         
 1   DlyCalDt      112400 non-null  datetime64[ns]
 2   ExcessReturn  112400 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 2.6 MB


In [49]:
stocks_permno = in_sample_df["PERMNO"].unique().tolist()
print(f"Number of unique stocks: {len(stocks_permno)}")

Number of unique stocks: 50


In [50]:
combined_df = pd.concat([in_sample_df, out_sample_df])
combined_df = combined_df.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)
combined_df[DATE_COL] = pd.to_datetime(combined_df[DATE_COL])

# Create rolling window for predictors
for lag in range(1, WINDOW+1):
    combined_df[f'lag_{lag}'] = combined_df.groupby(ID_COL)[TARGET_COL].shift(lag)
combined_df = combined_df.dropna(subset=[f'lag_{lag}' for lag in range(1, WINDOW+1)]).reset_index(drop=True)
combined_df.sort_values([ID_COL, DATE_COL], inplace=True)
combined_df.reset_index(drop=True, inplace=True)

In [51]:
records = []


for _, row in combined_df.iterrows():
    if row[DATE_COL] >= out_sample_start_date:
        context = [row[f'lag_{i}'] for i in range(WINDOW, 0, -1)]
        start_ts = row[DATE_COL] - pd.Timedelta(days=WINDOW)
        records.append({
            "start":  start_ts,
            "target": context
        })

test_ds = ListDataset(records, freq="D")

In [52]:
y_test = pd.Series(out_sample_df[TARGET_COL].values)

results = out_sample_df[[ID_COL, DATE_COL, TARGET_COL]]

In [53]:
# Creating a Function to Calculate Predictive-R2 Used in the Finance Literature
def r2(y_true, y_pred):
    return 1-(((y_true-y_pred)**2).sum()/(y_true**2).sum())

In [54]:
# Zero Shot Uni2ts-Moirai Small
moirai_s = MoiraiForecast(
    module = MoiraiModule.from_pretrained(f"Salesforce/moirai-1.1-R-small"),
    prediction_length = 1,
    context_length = WINDOW,
    patch_size = "auto",
    num_samples = 100,
    target_dim = 1,
    feat_dynamic_real_dim = 0,
    past_feat_dynamic_real_dim = 0
)
predictor = moirai_s.create_predictor(batch_size=32)
predictor.to(device)

preds = [forecasts.mean[0] for forecasts in predictor.predict(test_ds)]
y_moirai_s = pd.Series(preds)

results['y_moirai_s'] = y_moirai_s
r2_moirai_s  = r2(y_test, y_moirai_s)
mse_moirai_s = mean_squared_error(y_test, y_moirai_s)
mae_moirai_s = mean_absolute_error(y_test, y_moirai_s)
da_moirai_s = (np.sign(y_test) == np.sign(y_moirai_s)).mean()

  out = F.scaled_dot_product_attention(


KeyboardInterrupt: 

In [None]:
# Zero Shot Uni2ts-Moirai-MoE Small
moirai_moe_s = MoiraiMoEForecast(
    module = MoiraiMoEModule.from_pretrained(f"Salesforce/moirai-moe-1.0-R-small"),
    prediction_length = 1,
    context_length = WINDOW,
    patch_size = 16,
    num_samples = 100,
    target_dim = 1,
    feat_dynamic_real_dim = 0,
    past_feat_dynamic_real_dim = 0
)
predictor = moirai_moe_s.create_predictor(batch_size=32)
predictor.to(device)

preds = [forecasts.mean[0] for forecasts in predictor.predict(test_ds)]
y_moirai_moe_s = pd.Series(preds)

results['y_moirai_moe_s'] = y_moirai_moe_s
r2_moirai_moe_s  = r2(y_test, y_moirai_moe_s)
mse_moirai_moe_s = mean_squared_error(y_test, y_moirai_moe_s)
mae_moirai_moe_s = mean_absolute_error(y_test, y_moirai_moe_s)
da_moirai_moe_s = (np.sign(y_test) == np.sign(y_moirai_moe_s)).mean()

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/469M [00:00<?, ?B/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['y_moirai_moe_s'] = y_moirai_moe_s


In [None]:
# Zero Shot Uni2ts-Moirai-MoE Base
moirai_moe_b = MoiraiMoEForecast(
    module = MoiraiMoEModule.from_pretrained(f"Salesforce/moirai-moe-1.0-R-base"),
    prediction_length = 1,
    context_length = WINDOW,
    patch_size = 16,
    num_samples = 100,
    target_dim = 1,
    feat_dynamic_real_dim = 0,
    past_feat_dynamic_real_dim = 0
)
predictor = moirai_moe_b.create_predictor(batch_size=32)
predictor.to(device)

preds = [forecasts.mean[0] for forecasts in predictor.predict(test_ds)]
y_moirai_moe_b = pd.Series(preds)

results['y_moirai_moe_b'] = y_moirai_moe_b
r2_moirai_moe_b  = r2(y_test, y_moirai_moe_b)
mse_moirai_moe_b = mean_squared_error(y_test, y_moirai_moe_b)
mae_moirai_moe_b = mean_absolute_error(y_test, y_moirai_moe_b)
da_moirai_moe_b = (np.sign(y_test) == np.sign(y_moirai_moe_b)).mean()

config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/3.74G [00:00<?, ?B/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['y_moirai_moe_b'] = y_moirai_moe_b


In [None]:
# Collating Results

results_matrix = [{
        "Model": "Uni2ts-Moirai Small",
        "R-squared": r2_moirai_s,
        "MSE": mse_moirai_s,
        "MAE": mae_moirai_s,
        "Direction Accuracy": da_moirai_s
    },
    {
        "Model": "Uni2ts-Moirai-MoE Small",
        "R-squared": r2_moirai_moe_s,
        "MSE": mse_moirai_moe_s,
        "MAE": mae_moirai_moe_s,
        "Direction Accuracy": da_moirai_moe_s
    },
    {
        "Model": "Uni2ts-Moirai-MoE Base",
        "R-squared": r2_moirai_moe_b,
        "MSE": mse_moirai_moe_b,
        "MAE": mae_moirai_moe_b,
        "Direction Accuracy": da_moirai_moe_b
    }]

results_matrix_df = pd.DataFrame(results_matrix)
results_matrix_df

Unnamed: 0,Model,R-squared,MSE,MAE,Direction Accuracy
0,Uni2ts-Moirai Small,-41.217275,0.014539,0.020589,0.502532
1,Uni2ts-Moirai-MoE Small,-0.355351,0.000467,0.013946,0.502274
2,Uni2ts-Moirai-MoE Base,-0.673191,0.000576,0.014487,0.500704


In [None]:
# Save Prediction Results
results.to_csv("uni2ts(5-day)window.csv", index=False)