In [1]:
DATA_PATH = "/home/rguan/project/ML-Capstone-Team-7/FinTSBridge_models/TSLib_baseline/dataset/FBD/WTI-log.csv"
# recommended TimeMixer model
TimeMixer_PATH = "/home/rguan/project/ML-Capstone-Team-7/FinTSBridge_models/TSLib_baseline/results_WTI_trading_20251128_042447/long_term_forecast_WTI-log_512_6_TimeMixer_TimeMixer_custom_ftMS_sl512_ll0_pl6_dm16_nh8_el2_dl1_df32_expand2_dc4_fc1_ebtimeF_dtTrue_Exp_0/data_table.csv"

In [2]:
import pandas as pd
import numpy as np

def load_data(raw_data_path, predictions_path):
    raw_df = pd.read_csv(raw_data_path, parse_dates=['date'])
    pred_df = pd.read_csv(predictions_path, parse_dates=['date'])
    
    print(f"Raw data shape: {raw_df.shape}")
    print(f"Predictions shape: {pred_df.shape}")
    print(f"\nRaw data columns: {raw_df.columns.tolist()}")
    print(f"Predictions columns: {pred_df.columns.tolist()}")
    
    return raw_df, pred_df

raw_df, pred_df = load_data(DATA_PATH, TimeMixer_PATH)

Raw data shape: (10685, 7)
Predictions shape: (2131, 13)

Raw data columns: ['date', 'open_future', 'high_future', 'low_future', 'volume_future', 'close_future', 'daily_return']
Predictions columns: ['date', 'true_0', 'true_1', 'true_2', 'true_3', 'true_4', 'true_5', 'pred_0', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']


In [3]:
import os

np.load(os.path.join(TimeMixer_PATH[:-14], 'pred.npy'))

array([[[-0.01957919],
        [-0.00869468],
        [-0.00339313],
        [-0.01044839],
        [ 0.00015005],
        [ 0.00925906]],

       [[-0.0195546 ],
        [-0.01048752],
        [-0.00580663],
        [ 0.00014705],
        [ 0.00558238],
        [ 0.00373431]],

       [[-0.01540366],
        [-0.00359255],
        [ 0.00886927],
        [ 0.0031053 ],
        [ 0.00101346],
        [-0.00800688]],

       ...,

       [[ 0.00200339],
        [ 0.00119752],
        [ 0.00324858],
        [ 0.00103549],
        [ 0.00280001],
        [ 0.00225395]],

       [[ 0.00165694],
        [-0.00594709],
        [-0.00311656],
        [ 0.00260899],
        [ 0.00064926],
        [-0.00257325]],

       [[-0.01158336],
        [-0.01644211],
        [-0.00262405],
        [ 0.00359043],
        [-0.00090337],
        [ 0.00176061]]], shape=(2131, 6, 1))

In [4]:
pred_df.head()

Unnamed: 0,date,true_0,true_1,true_2,true_3,true_4,true_5,pred_0,pred_1,pred_2,pred_3,pred_4,pred_5
0,2017-05-02,-0.003376,-0.013014,-0.007891,0.006681,0.00121,-0.013186,-0.019579,-0.008695,-0.003393,-0.010448,0.00015,0.009259
1,2017-05-03,-0.013014,-0.007891,0.006681,0.00121,-0.013186,0.007325,-0.019555,-0.010488,-0.005807,0.000147,0.005582,0.003734
2,2017-05-04,-0.007891,0.006681,0.00121,-0.013186,0.007325,-0.009983,-0.015404,-0.003593,0.008869,0.003105,0.001013,-0.008007
3,2017-05-05,0.006681,0.00121,-0.013186,0.007325,-0.009983,-0.024457,-0.008634,0.001812,0.005713,0.001064,-0.011816,-0.000849
4,2017-05-08,0.00121,-0.013186,0.007325,-0.009983,-0.024457,0.003352,-0.009619,0.000662,-0.002217,-0.008829,-0.005902,-0.002078


In [5]:
import datetime
from zoneinfo import ZoneInfo

file_path = TimeMixer_PATH

mtime = os.path.getmtime(file_path)   # 最后修改时间（秒）
atime = os.path.getatime(file_path)   # 最后访问时间

print("Last modified:", datetime.datetime.fromtimestamp(mtime, tz=ZoneInfo("America/Chicago")))
print("Last accessed:", datetime.datetime.fromtimestamp(atime, tz=ZoneInfo("America/Chicago")))

Last modified: 2025-11-27 22:48:02.088403-06:00
Last accessed: 2025-11-27 22:48:02.060403-06:00


In [6]:
df = pd.read_csv(DATA_PATH)

# 计算两种 log return
log_return_past = np.log(df['close_future'] / df['close_future'].shift(1))    # 错误
log_return_future = np.log(df['close_future'].shift(-1) / df['close_future']) # 正确

# 对比
print("你的 daily_return[0:5]:", df['daily_return'].iloc[0:5].tolist())
print("过去 log return[0:5]:", log_return_past.iloc[0:5].tolist())
print("未来 log return[0:5]:", log_return_future.iloc[0:5].tolist())

# 相关性
print(f"\n与过去log return相关性: {df['daily_return'].corr(log_return_past):.4f}")
print(f"与未来log return相关性: {df['daily_return'].corr(log_return_future):.4f}")

你的 daily_return[0:5]: [0.0057911271948978, 0.009129533952977, 0.0063748595228505, 0.008989740297736, 0.0069361479781886]
过去 log return[0:5]: [nan, 0.005791127194897802, 0.009129533952977022, 0.006374859522850533, 0.008989740297736056]
未来 log return[0:5]: [0.005791127194897802, 0.009129533952977022, 0.006374859522850533, 0.008989740297736056, 0.006936147978188625]

与过去log return相关性: 0.0207
与未来log return相关性: 0.9467


In [8]:
df = pd.read_csv(DATA_PATH)

# 用 close_future 计算两种 return
log_return_past = np.log(df['close_future'] / df['close_future'].shift(1))
log_return_future = np.log((df['close_future'].shift(-1) / df['close_future']).dropna())

print("你的 daily_return[1:6]:")
print(df['daily_return'].iloc[1:6].values)

print("\n从close_future计算的过去return[1:6]:")
print(log_return_past.iloc[1:6].values)

print("\n从close_future计算的未来return[1:6]:")
print(log_return_future.iloc[1:6].values)

print(f"\n与过去return相关性: {df['daily_return'].corr(log_return_past):.6f}")
print(f"与未来return相关性: {df['daily_return'].corr(log_return_future):.6f}")

你的 daily_return[1:6]:
[ 0.00912953  0.00637486  0.00898974  0.00693615 -0.00428798]

从close_future计算的过去return[1:6]:
[0.00579113 0.00912953 0.00637486 0.00898974 0.00693615]

从close_future计算的未来return[1:6]:
[ 0.00912953  0.00637486  0.00898974  0.00693615 -0.00428798]

与过去return相关性: 0.020696
与未来return相关性: 0.946690
