In [None]:
import pandas as pd
import numpy as np

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3,4"

In [None]:
import timesfm

## Load model

In [None]:
tfm = timesfm.TimesFm(
    context_len=512,
    horizon_len=1,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend="gpu",
)

In [None]:
tfm.load_from_checkpoint(checkpoint_path="~/timesfm-1.0-200m/checkpoints")

## Load data

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import random
import os

In [None]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # 如果你使用多个GPU
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(1)

In [None]:
# 计算最小值和最大值, 并进行归一化
def norm_lstm_tensor(data, labels, freq='quarter'):
    if freq == 'quarter':
        drop_index = -3
    else:
        drop_index = -2
    
    # 将最后一维的数据分开
    data_to_norm = data[:, :, :drop_index]  # 除了最后一个维度
    labels_to_norm = labels[:, :drop_index]  # 除了最后一个维度

    # 计算data的最小值和最大值
    max_vals_data, _ = torch.max(data_to_norm, dim=0)  # 对第一个维度求最大值
    max_vals_data, _ = torch.max(max_vals_data, dim=0)  # 再对第二个维度求最大值
    
    min_vals_data, _ = torch.min(data_to_norm, dim=0)  # 对第一个维度求最小值
    min_vals_data, _ = torch.min(min_vals_data, dim=0)  # 再对第二个维度求最小值

    # 计算labels的最小值和最大值
    min_vals_label = labels_to_norm.min(dim=0, keepdim=True).values[-1]
    max_vals_label = labels_to_norm.max(dim=0, keepdim=True).values[-1]

    min_value_all = torch.min(min_vals_data, min_vals_label)
    max_value_all = torch.max(max_vals_data, max_vals_label)

    min_value = min_value_all[-1]
    max_value = max_value_all[-1]

    # 计算 Min-Max 归一化
    # 对 data (去除最后一个维度的部分) 进行 Min-Max 归一化
    normalized_data_to_norm = (data_to_norm - min_value_all) / (max_value_all - min_value_all)

    # 对 label (去除最后一个维度的部分) 进行 Min-Max 归一化
    normalized_labels_to_norm = (labels_to_norm - min_value_all) / (max_value_all - min_value_all)

    # 重新拼接保留的最后一个维度
    normalized_data = torch.cat([normalized_data_to_norm, data[:, :, drop_index:]], dim=2)
    normalized_label = torch.cat([normalized_labels_to_norm, labels[:, drop_index:]], dim=1)
    
    return normalized_data, normalized_label, min_value, max_value


def split_lstm_dataset_by_year(data, labels, year, freq='quarter'):
    if freq == 'quarter':
        dim_index = -2
    else:
        dim_index = -1
        
    train_index_list = []
    test_index_list = []
    for i in range(len(labels)):
        if labels[i, dim_index] >= year:
            test_index_list.append(i)
        else:
            train_index_list.append(i)


    train_data = data[train_index_list, :, :dim_index-1]
    train_targets = labels[train_index_list, :dim_index-1]
    
    test_data = data[test_index_list, :, :dim_index-1]
    test_targets = labels[test_index_list, :dim_index-1]
    return train_data, test_data, train_targets, test_targets


def reverse_norm(row):
    # row = row.cpu()
    if len(row.shape) == 2:
        gap = max_value.item() - min_value.item()
        return row[:, -1] * gap + min_value.item()
    else:
        gap = max_value.item() - min_value.item()
        return row * gap + min_value.item()


In [None]:
file_item_list = []
for file_item in os.listdir('../dataset/'):
    if ('LSTM_data_gdp_' in file_item) and ('light' not in file_item):
        file_item_list.append(file_item)
    else:
        continue

print('file item list length: ', len(file_item_list))

In [None]:
file_item_list

In [None]:
from utils.metrics import metric
import torch

In [None]:
import time
dict_temp = {}
for file_item in file_item_list:
    print(file_item)
    start_time = time.time()
    data_path = '../dataset/' + file_item
    label_path = '../dataset/' + file_item.replace('LSTM_data', 'LSTM_label')
    
    
    set_seed(1)
    data = torch.load(data_path)
    labels = torch.load(label_path)
    
    data, labels, min_value, max_value = norm_lstm_tensor(data, labels, 'quarter')
    if '95-19' in file_item:
        year = 2018
    elif '13-19' in file_item:
        year = 2019
    else:
        raise ValueError('Wrong')
    train_data, test_data, train_targets, test_targets = split_lstm_dataset_by_year(data, labels, year, freq='quarter')

    forecast_input = test_data[:, :, -1]
    frequency_input = [2] * test_data[:, :, -1].shape[0]
    
    point_forecast, experimental_quantile_forecast = tfm.forecast(
        forecast_input,
        freq=frequency_input,
    )


    # mae, mse, rmse, mape, mspe, rse, corr
    mae, mse, rmse, mape, mspe, rse, corr = metric(torch.Tensor([reverse_norm(item) for item in test_targets[:, -1]]),
          torch.Tensor([reverse_norm(item) for item in point_forecast]))

    print('mae, mse, rmse, mape, mspe, rse, corr', mae, mse, rmse, mape, mspe, rse, corr)
    dict_temp[file_item] = [mae, mse, rmse, mape, mspe, rse, corr]

In [None]:
df_res = pd.DataFrame(dict_temp).T
df_res.columns = ['mae', 'mse', 'rmse', 'mape', 'mspe', 'rse', 'corr']
df_res

In [None]:
for col in ['mae', 'mse', 'rmse', 'mape', 'mspe', 'rse', 'corr']:
    df_res[col] = df_res[col].apply(lambda x: x.item())
df_res

In [None]:
df_res.to_csv('timesfm_lstm_res.csv')