In [1]:
import numpy as np
import pandas as pd
import torch
from timesfm import TimesFm, TimesFmHparams, TimesFmCheckpoint
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os

TimesFM v1.2.0. See https://github.com/google-research/timesfm/blob/master/README.md for updated APIs.
Loaded Jax TimesFM.


  from .autonotebook import tqdm as notebook_tqdm


Loaded PyTorch TimesFM.


In [2]:
# Parameters and settings

# Parameters for data split
WINDOW = 5  # rolling window size to use as predictors (for TimesFM window size should be multiple of 32 - input patch length)
DATE_COL = 'DlyCalDt'
ID_COL = 'PERMNO'
TARGET_COL = 'ExcessReturn'

# Estimation (in sample) period dates
in_sample_start_date = pd.to_datetime("2000-01-01")
in_sample_end_date = pd.to_datetime("2015-12-31")

# Out-of-sample period dates
out_sample_start_date = pd.to_datetime("2016-01-01")
out_sample_end_date = pd.to_datetime("2024-12-31")

# Use GPU if available, else default to using CPU
device_map = "cpu" 
device = torch.device("cpu")

In [3]:
# Load the cleaned and filtered data files for in sample and out of sample periods into a pandas DataFrames
in_sample_df = pd.read_csv("../Cleaned Datasets/in_sample_cleaned.csv")
out_sample_df = pd.read_csv("../Cleaned Datasets/out_sample_cleaned.csv")


# Ensure the date columns are in datetime format
in_sample_df[DATE_COL] = pd.to_datetime(in_sample_df[DATE_COL])
out_sample_df[DATE_COL] = pd.to_datetime(out_sample_df[DATE_COL])

in_sample_df = in_sample_df[[ID_COL, DATE_COL, TARGET_COL]].dropna()
out_sample_df = out_sample_df[[ID_COL, DATE_COL, TARGET_COL]].dropna()

in_sample_df = in_sample_df.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)
out_sample_df = out_sample_df.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)

In [4]:
in_sample_df.info()
out_sample_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199550 entries, 0 to 199549
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   PERMNO        199550 non-null  int64         
 1   DlyCalDt      199550 non-null  datetime64[ns]
 2   ExcessReturn  199550 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 4.6 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112400 entries, 0 to 112399
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   PERMNO        112400 non-null  int64         
 1   DlyCalDt      112400 non-null  datetime64[ns]
 2   ExcessReturn  112400 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 2.6 MB


In [5]:
stocks_permno = in_sample_df["PERMNO"].unique().tolist()
print(f"Number of unique stocks: {len(stocks_permno)}")

Number of unique stocks: 50


In [6]:
# Spit data into estimation (in-sample) and out-of-sample data
df_train = in_sample_df.copy()
df_test = out_sample_df.copy()

In [7]:
print(df_train.info())
print(df_test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199550 entries, 0 to 199549
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   PERMNO        199550 non-null  int64         
 1   DlyCalDt      199550 non-null  datetime64[ns]
 2   ExcessReturn  199550 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 4.6 MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112400 entries, 0 to 112399
Data columns (total 3 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   PERMNO        112400 non-null  int64         
 1   DlyCalDt      112400 non-null  datetime64[ns]
 2   ExcessReturn  112400 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 2.6 MB
None


In [8]:
# Create rolling window for predictors for Bolt models

combined_df = pd.concat([in_sample_df, out_sample_df])
combined_df = combined_df.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)
combined_df[DATE_COL] = pd.to_datetime(combined_df[DATE_COL])

contexts = []
targets = []
records = []

for id, grp in combined_df.groupby(ID_COL):
    values = grp[TARGET_COL].values
    dates = grp[DATE_COL].values

    for i in range(len(values) - WINDOW):
        pred_date = dates[i + WINDOW]
        if pred_date >= pd.to_datetime("2016-01-01"):
            contexts.append(torch.tensor(values[i:i+WINDOW], dtype=torch.float32, device=device))
            targets.append(values[i+WINDOW])
            records.append({
                ID_COL: id,
                TARGET_COL: values[i+WINDOW],
                DATE_COL: pred_date
            })

#contexts = np.array(contexts).astype(np.float32)  # shape = (N, WINDOW)
#targets = np.array(targets).astype(np.float32)    # shape = (N,)

In [9]:
y_test = pd.Series(targets)

results = pd.DataFrame(records)

In [10]:
# Creating a Function to Calculate Predictive-R2 Used in the Finance Literature
def r2(y_true, y_pred):
    return 1-(((y_true-y_pred)**2).sum()/(y_true**2).sum())

In [11]:
# Directional Accuracy Split
def directional_accuracy(y_true, y_pred):
    sign_match = np.sign(y_true) == np.sign(y_pred)
    up_da = sign_match[y_true > 0].mean() if np.any(y_true > 0) else np.nan
    down_da = sign_match[y_true < 0].mean() if np.any(y_true < 0) else np.nan
    return up_da, down_da

In [12]:
# Zero Shot TimesFM-1.0-200M
tfm1 = TimesFm(
    hparams = TimesFmHparams(
        context_len = 32,
        horizon_len = 1,
        input_patch_len = 32,      # fixed for 200m model
        output_patch_len = 128,    # fixed for 200m model
        num_layers = 20,           # fixed for 200m model
        model_dims = 1280,         # fixed for 200m model
        backend = device_map       
        ),
    checkpoint = TimesFmCheckpoint(huggingface_repo_id="google/timesfm-1.0-200m-pytorch")
    )
freqs = [0] * len(contexts)
preds, _ = tfm1.forecast(contexts, freq=freqs)

y_tfm1 = pd.Series(preds.reshape([-1,]))

results['y_tfm1'] = y_tfm1
r2_tfm1  = r2(y_test, y_tfm1)
mse_tfm1 = mean_squared_error(y_test, y_tfm1)
mae_tfm1 = mean_absolute_error(y_test, y_tfm1)
da_tfm1 = (np.sign(y_test) == np.sign(y_tfm1)).mean()
up_da_tfm1, down_da_tfm1 = directional_accuracy(y_test, y_tfm1)

Fetching 3 files: 100%|██████████| 3/3 [00:00<?, ?it/s]


In [13]:
# Zero Shot TimesFM-2.0-500M
tfm2 = TimesFm(
    hparams = TimesFmHparams(
        context_len = 32,
        horizon_len = 1,
        input_patch_len = 32,      # fixed for 500m model
        output_patch_len = 128,    # fixed for 500m model
        num_layers = 50,           # fixed for 500m model
        model_dims = 1280,         # fixed for 500m model
        backend = device_map       
        ),
    checkpoint = TimesFmCheckpoint(huggingface_repo_id="google/timesfm-2.0-500m-pytorch")
    )

freqs = [0] * len(contexts)
preds, _ = tfm2.forecast(contexts, freq=freqs)

y_tfm2 = pd.Series(preds.reshape([-1,]))

results['y_tfm2'] = y_tfm2
r2_tfm2  = r2(y_test, y_tfm2)
mse_tfm2 = mean_squared_error(y_test, y_tfm2)
mae_tfm2 = mean_absolute_error(y_test, y_tfm2)
da_tfm2 = (np.sign(y_test) == np.sign(y_tfm2)).mean()
up_da_tfm2, down_da_tfm2 = directional_accuracy(y_test, y_tfm2)

Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 2491.27it/s]


In [14]:
# Collating Results

results_matrix = [{
        "Model": "TimesFM-1.0-200M",
        "R-squared": r2_tfm1,
        "MSE": mse_tfm1,
        "MAE": mae_tfm1,
        "Direction Accuracy": da_tfm1,
        "Up Directional Accuracy": up_da_tfm1,
        "Down Directional Accuracy": down_da_tfm1
    },
    {
        "Model": "TimesFM-2.0-500M",
        "R-squared": r2_tfm2,
        "MSE": mse_tfm2,
        "MAE": mae_tfm2,
        "Direction Accuracy": da_tfm2,
        "Up Directional Accuracy": up_da_tfm2,
        "Down Directional Accuracy": down_da_tfm2
    }]

results_matrix_df = pd.DataFrame(results_matrix)
results_matrix_df.to_csv("timesfm(5-day)_results.csv", index=False)

In [15]:
# Save Prediction Results
results.to_csv("timesfm(5-day)", index=False)