In [None]:
import pandas as pd
import numpy as np

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3,4"

In [None]:
import timesfm

## Load model

In [None]:
tfm = timesfm.TimesFm(
    context_len=512,
    horizon_len=1,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend="gpu",
)

In [None]:
tfm.load_from_checkpoint(checkpoint_path="~/timesfm-1.0-200m/checkpoints")

## Load data

In [None]:
df_weo = pd.read_csv('../../quarterly_data/df_res_gdp_more.csv', index_col=0)
df_weo = df_weo.dropna(subset=['GDP'])
df_weo.index = [x for x in range(len(df_weo))]
df_filter = df_weo.dropna().iloc[:, :3]
df_filter.columns = ['ds', 'unique_id', 'y']
df_filter['ds'] = pd.to_datetime(df_filter['ds'])

In [None]:
df_filter = df_filter.sort_values(['unique_id', 'ds'])

In [None]:
df_filter['ds'].min()

In [None]:
df_filter['ds'].max()

In [None]:
df_filter = df_filter.loc[(df_filter['ds'] >= '1995-01-01') & (df_filter['ds'] <= '2019-12-31')]
df_filter.tail()

In [None]:
df_filter['y'].min()

In [None]:
df_filter['y'].max()

## NORM

In [None]:
def norm(col):
    return (col - col.min())/(col.max() - col.min())

In [None]:
for v in ['y']:
    df_filter[v] = df_filter[[v]].apply(lambda col: norm(col), axis=0)

In [None]:
df_filter.tail()

In [None]:
def reverse_norm(row):
    min_value = -12.9
    max_value = 72.32
    gap = max_value - min_value
    return row * gap + min_value

## Run

In [None]:
import pandas as pd

# 定义一个函数来划分训练集和测试集
def split_train_test_by_year_and_quarter(df, unique_id_col, date_col, train_start_year, train_start_quarter, train_end_year, train_end_quarter, test_start_year, test_start_quarter, test_end_year, test_end_quarter):
    train_list = []
    test_list = []

    for unique_id in df[unique_id_col].unique():
        # 筛选出特定ID的数据
        df_id = df[df[unique_id_col] == unique_id]
        
        # 提取年份和季度
        df_id['year'] = df_id[date_col].dt.year
        df_id['quarter'] = df_id[date_col].dt.quarter

        # 筛选训练集
        train_df_id = df_id[
            ((df_id['year'] > train_start_year) | ((df_id['year'] == train_start_year) & (df_id['quarter'] >= train_start_quarter))) &
            ((df_id['year'] < train_end_year) | ((df_id['year'] == train_end_year) & (df_id['quarter'] <= train_end_quarter)))
        ]
        train_list.append(train_df_id)

        # 筛选测试集
        test_df_id = df_id[
            ((df_id['year'] > test_start_year) | ((df_id['year'] == test_start_year) & (df_id['quarter'] >= test_start_quarter))) &
            ((df_id['year'] < test_end_year) | ((df_id['year'] == test_end_year) & (df_id['quarter'] <= test_end_quarter)))
        ]
        test_list.append(test_df_id)

    train_df = pd.concat(train_list)
    test_df = pd.concat(test_list)
    
    return train_df, test_df


In [None]:
def get_rolling_res(df_filter):
    # 使用函数划分训练集和测试集
    train_start_year = 1995
    train_start_quarter = 1

    test_end_year = 2019
    test_end_quarter = 4

    split_time = 2018

    df_res_list = []
    for year in range(split_time, 2019+1):
        for quarter in range(1, 4+1):
            test_start_year = year
            test_start_quarter = quarter
            
            if quarter == 1:
                train_end_year = test_start_year - 1
                train_end_quarter = 4
            else:
                train_end_year = test_start_year
                train_end_quarter = test_start_quarter - 1

            print("train_end_year, train_end_quarter, test_start_year, test_start_quarter:")
            print(train_end_year, train_end_quarter, test_start_year, test_start_quarter)
    
    
            train_df, test_df = split_train_test_by_year_and_quarter(df_filter, 'unique_id', 'ds', train_start_year, train_start_quarter, train_end_year, train_end_quarter,
                                                                     test_start_year, test_start_quarter, test_end_year, test_end_quarter)
            print('test_start: ', year, ' year ', quarter, ' quarter')
            print(len(train_df), len(test_df))
            forecast_df2 = tfm.forecast_on_df(
                inputs=train_df,
                freq="Q",  
                value_name="y",
                num_jobs=-1,
            )
            
            df_res = pd.merge(test_df, forecast_df2[['ds', 'unique_id', 'timesfm']],
                     how='left', on=['ds', 'unique_id']).dropna()
            print(len(df_res))
            df_res_list.append(df_res)
    return pd.concat(df_res_list)

In [None]:
df_res = get_rolling_res(df_filter)

In [None]:
df_res

## reverse norm

In [None]:
df_res['y'] = df_res['y'].apply(lambda x: reverse_norm(x))
df_res['timesfm'] = df_res['timesfm'].apply(lambda x: reverse_norm(x))

In [None]:
df_res

## Eval

In [None]:
from utils.metrics import metric
import torch

In [None]:
# mae, mse, rmse, mape, mspe, rse, corr
metric(torch.Tensor(df_res['y'].values),
      torch.Tensor(df_res['timesfm'].values))