In [1]:
!git clone https://github.com/yuezhihan/ts2vec.git
%cd ts2vec


Cloning into 'ts2vec'...
remote: Enumerating objects: 133, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 133 (delta 23), reused 21 (delta 21), pack-reused 94 (from 1)[K
Receiving objects: 100% (133/133), 37.01 KiB | 4.63 MiB/s, done.
Resolving deltas: 100% (67/67), done.
/kaggle/working/ts2vec


## ts2vec: encoder + train thêm head

In [2]:
import numpy as np
import pandas as pd
from ts2vec import TS2Vec

# Load lại FPT
df = pd.read_csv("/kaggle/input/aio-2025-linear-forecasting-challenge/FPT_train.csv", parse_dates=["time"])
df = df.sort_values("time").reset_index(drop=True)

feat_cols = ["open", "high", "low", "close", "volume"]
data = df[feat_cols].values.astype("float32")  # [T, D]
data_ts2vec = data[None, :, :]                 # [1, T, D]

model = TS2Vec(
    input_dims=data.shape[1],
    output_dims=320,
    device=0  # GPU
)

loss_log = model.fit(
    data_ts2vec,
    n_epochs=50,
    verbose=True
)


Epoch #0: loss=17410134376448.0
Epoch #1: loss=21310692392960.0
Epoch #2: loss=13655895703552.0
Epoch #3: loss=8281344442368.0
Epoch #4: loss=2060799967232.0
Epoch #5: loss=4094909480960.0
Epoch #6: loss=566131490816.0
Epoch #7: loss=826602094592.0
Epoch #8: loss=1523922108416.0
Epoch #9: loss=579298852864.0
Epoch #10: loss=1677892386816.0
Epoch #11: loss=1072175054848.0
Epoch #12: loss=342377594880.0
Epoch #13: loss=873109913600.0
Epoch #14: loss=43413307392.0
Epoch #15: loss=1008100638720.0
Epoch #16: loss=668451405824.0
Epoch #17: loss=189717921792.0
Epoch #18: loss=47991742464.0
Epoch #19: loss=411574566912.0
Epoch #20: loss=108322766848.0
Epoch #21: loss=307816988672.0
Epoch #22: loss=111271919616.0
Epoch #23: loss=292984160256.0
Epoch #24: loss=207890382848.0
Epoch #25: loss=276793098240.0
Epoch #26: loss=231918665728.0
Epoch #27: loss=242614468608.0
Epoch #28: loss=229175508992.0
Epoch #29: loss=224070975488.0
Epoch #30: loss=241251041280.0
Epoch #31: loss=114475761664.0
Epoch #

In [4]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

# Causal representation theo thời gian
repr_ts = model.encode(
    data_ts2vec,
    causal=True,
    sliding_length=1,
    sliding_padding=50
)[0]  # [T, repr_dims]

close = df["close"].values.astype("float32")
T = len(df)

horizon = 100
window = 200

Xs, Ys = [], []

for t in range(window, T - horizon):
    window_repr = repr_ts[t - window:t]            # [window, repr_dims]
    feat_vec = window_repr.mean(axis=0)            # [repr_dims]
    target_vec = close[t:t + horizon]              # [100]

    Xs.append(feat_vec)
    Ys.append(target_vec)

X = np.stack(Xs)
Y = np.stack(Ys)

head = MultiOutputRegressor(Ridge(alpha=1.0))
head.fit(X, Y)

pred_train = head.predict(X)
print("Train MSE:", mean_squared_error(Y, pred_train))


Train MSE: 1.8855009


In [5]:
last_window_repr = repr_ts[-window:]
last_feat = last_window_repr.mean(axis=0).reshape(1, -1)

future_close_100 = head.predict(last_feat)[0]  # [100]
future_close_100[:10]


array([123.84622 , 103.64784 ,  75.23776 ,  88.3262  , 100.27549 ,
       107.73988 , 108.873055, 111.39273 ,  88.101974,  88.68649 ],
      dtype=float32)

In [6]:
sub = pd.DataFrame({
    "id": range(1, 101),
    "close": future_close_100
})
sub.to_csv("submission_ts2vec_fpt.csv", index=False)
sub.head()
# 1867.0211 

Unnamed: 0,id,close
0,1,123.846222
1,2,103.647842
2,3,75.237762
3,4,88.326202
4,5,100.27549


## Scaling 

In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from ts2vec import TS2Vec

# 1. Load FPT
df = pd.read_csv(
    "/kaggle/input/aio-2025-linear-forecasting-challenge/FPT_train.csv",
    parse_dates=["time"]
)
df = df.sort_values("time").reset_index(drop=True)

# 2. Chọn feature cho encoder
feat_cols = ["open", "high", "low", "close", "volume"]
X_raw = df[feat_cols].values.astype("float32")  # [T, D]

# 3. Scale feature cho TS2Vec
scaler = StandardScaler()
X = scaler.fit_transform(X_raw).astype("float32")  # [T, D]

# 4. Đưa về dạng [n_instances, T, D]
data_ts2vec = X[None, :, :]  # [1, T, D]

# 5. Khởi tạo và train TS2Vec
model = TS2Vec(
    input_dims=X.shape[1],
    output_dims=320,
    device=0  # GPU Kaggle
)

loss_log = model.fit(
    data_ts2vec,
    n_epochs=50,
    verbose=True
)


Epoch #0: loss=11.333951950073242
Epoch #1: loss=4.960279941558838
Epoch #2: loss=3.0735974311828613
Epoch #3: loss=2.5296692848205566
Epoch #4: loss=2.7682502269744873
Epoch #5: loss=2.6619296073913574
Epoch #6: loss=2.2049336433410645
Epoch #7: loss=2.018357515335083
Epoch #8: loss=1.8670700788497925
Epoch #9: loss=1.698573112487793
Epoch #10: loss=1.8986903429031372
Epoch #11: loss=1.9048064947128296
Epoch #12: loss=1.8499113321304321
Epoch #13: loss=1.9432696104049683
Epoch #14: loss=1.6912339925765991
Epoch #15: loss=1.7708336114883423
Epoch #16: loss=1.806976556777954
Epoch #17: loss=1.8812509775161743
Epoch #18: loss=1.5465178489685059
Epoch #19: loss=1.7699650526046753
Epoch #20: loss=1.4833776950836182
Epoch #21: loss=1.8434568643569946
Epoch #22: loss=1.6145527362823486
Epoch #23: loss=1.4556267261505127
Epoch #24: loss=1.7177469730377197
Epoch #25: loss=1.685755968093872
Epoch #26: loss=1.4788905382156372
Epoch #27: loss=1.8027113676071167
Epoch #28: loss=1.6636238098144531


In [8]:
# 1. Lấy cửa sổ cuối cùng để dự đoán tương lai
last_window_repr = repr_ts[-window:]              # [window, repr_dims]
last_feat = last_window_repr.mean(axis=0)         # [repr_dims]
last_feat = last_feat.reshape(1, -1)              # [1, repr_dims]

# 2. Dự đoán 100 ngày close tiếp theo
future_close_100 = head.predict(last_feat)[0]     # [100]

print("First 10 predicted closes:", future_close_100[:10])
print("Pred shape:", future_close_100.shape)


First 10 predicted closes: [123.84622  103.64784   75.23776   88.3262   100.27549  107.73988
 108.873055 111.39273   88.101974  88.68649 ]
Pred shape: (100,)


In [10]:
sub = pd.DataFrame({
    "id": np.arange(1, 101),
    "close": future_close_100
})
sub.to_csv("submission_ts2vec_head.csv", index=False)
sub.head()

# 656.1287 

Unnamed: 0,id,close
0,1,123.846222
1,2,103.647842
2,3,75.237762
3,4,88.326202
4,5,100.27549
