<a href="https://colab.research.google.com/github/Hvitzerk/Skripsi/blob/main/TFT_Wisman_Training_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TFT Wisman (IDN) — Training & Forecast 2025

Notebook ini melatih **Temporal Fusion Transformer (TFT)** untuk memprediksi **kunjungan wisman Indonesia**.

### Fitur yang dipakai (di `master_dataset_core3_ready_with_y_holidays.csv`):
- Target: `y`
- Unknown (past): `cpi_2022base`, `fx_usd`
- Known-future: `holiday_flag`, `holidays_count`, `evoa_available`, `month`
- ID: `series_id`, waktu: `date`, `time_idx`

Horizon = **12 bulan (2025)**, encoder length = **24 bulan**.


In [1]:
# ✅ Install untuk Python 3.12 Colab (kompatibel dgn Torch bawaan)
!pip -q install --upgrade "numpy>=2.0,<2.3" pandas==2.2.2 \
    pytorch-forecasting==1.4.0 "lightning>=2.4,<2.6" torchmetrics==1.4.0


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m260.9/260.9 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m35.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.5/16.5 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.2/825.2 kB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m829.2/829.2 kB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.19.0 requires numpy<2.2.0,>=1.26.0, but you have numpy 2.2.6 which is incompat

In [None]:
import os, numpy as np
print("NumPy after install:", np.__version__)
os.kill(os.getpid(), 9)  # ⬅️ otomatis restart


In [1]:
import pandas as pd, numpy as np, torch
import lightning.pytorch as pl
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss, SMAPE, MAPE

print("Versions ->",
      "PF:", __import__("pytorch_forecasting").__version__,
      "| Lightning:", __import__("lightning").__version__,
      "| Torch:", torch.__version__,
      "| NumPy:", np.__version__)

pl.seed_everything(42)


Versions -> PF: 1.4.0 | Lightning: 2.5.4 | Torch: 2.8.0+cu126 | NumPy: 2.2.6


INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42


42

In [2]:
#@title Load data (upload or from Drive)
from google.colab import files
print("Silakan upload file master: master_dataset_core3_ready_with_y_holidays.csv")
uploaded = files.upload()
fname = list(uploaded.keys())[0]
df = pd.read_csv(fname, parse_dates=["date"])
df.sort_values(["series_id","date"], inplace=True)
df["month"] = df["date"].dt.month.astype(int)  # known-future helper
df.head()


Silakan upload file master: master_dataset_core3_ready_with_y_holidays.csv


Saving master_dataset_core3_ready_with_y_full.csv to master_dataset_core3_ready_with_y_full.csv


Unnamed: 0,series_id,date,time_idx,year,month,y,holiday_flag,visa_free,evoa_available,intl_passengers_total,fx_usd,cpi_2022base
0,IDN,2020-01-01,1,2020,1,1290411.0,,,0,,13732.228409,91.93488
1,IDN,2020-02-01,2,2020,2,872765.0,,,0,,13776.15275,92.190426
2,IDN,2020-03-01,3,2020,3,486155.0,,,0,,15194.57381,92.278545
3,IDN,2020-04-01,4,2020,4,158066.0,,,0,,15867.431905,92.349041
4,IDN,2020-05-01,5,2020,5,161842.0,,,0,,14906.190937,92.410724


Patch *Data*

In [3]:
import pandas as pd
import numpy as np

# --- 1) Pastikan kolom tanggal turunan ada ---
df["month"] = df["date"].dt.month.astype(int)

# --- 2) Tambah e-VOA kalau belum ada ---
if "evoa_available" not in df.columns:
    df["evoa_available"] = (df["date"] >= pd.Timestamp("2022-11-01")).astype(int)

# --- 3) Tambah holiday_flag & holidays_count kalau belum ada ---
eid_month_by_year = {2020:5, 2021:5, 2022:5, 2023:4, 2024:4, 2025:3}

def build_holiday_cols(df):
    mo = df["date"].dt.month
    yr = df["date"].dt.year
    eid_mo = yr.map(eid_month_by_year)

    # flag 1 jika Des/Jan atau bulan Idulfitri
    holiday_flag = (mo.isin([12, 1]) | (mo == eid_mo)).astype(int)

    # count: 2 untuk Des/Jan, 1 untuk Eid (kecuali kalau Eid jatuh Des/Jan, tetap 2)
    holidays_count = pd.Series(0, index=df.index)
    holidays_count.loc[mo.isin([12, 1])] = 2
    # set 1 hanya jika bukan Des/Jan
    mask_eid_only = (mo == eid_mo) & (~mo.isin([12, 1]))
    holidays_count.loc[mask_eid_only] = 1

    return holiday_flag, holidays_count.astype(int)

if "holiday_flag" not in df.columns or "holidays_count" not in df.columns:
    df["holiday_flag"], df["holidays_count"] = build_holiday_cols(df)

# --- 4) Pastikan kolom numerik benar2 numeric (NaN kalau gagal parse) ---
num_cols = ["y", "cpi_2022base", "fx_usd", "holiday_flag", "holidays_count", "evoa_available", "month", "time_idx"]
for c in num_cols:
    df[c] = pd.to_numeric(df[c], errors="coerce")

# --- 5) Cek tidak ada y yang kosong di periode training (<= 2024-12) ---
cutoff_time = pd.Timestamp("2024-12-01")
missing_y_train = df[(df["date"] <= cutoff_time) & (df["y"].isna())]
print("Missing y in train period:", len(missing_y_train))
if len(missing_y_train) > 0:
    display(missing_y_train[["date", "y"]].head())

# --- 6) Cek fitur unknown reals (cpi, fx) tidak ada NaN di train ---
print("NaN in train (cpi, fx):")
print(df[df["date"] <= cutoff_time][["cpi_2022base", "fx_usd"]].isna().sum())


Missing y in train period: 0
NaN in train (cpi, fx):
cpi_2022base    0
fx_usd          0
dtype: int64


Split train/val (≤ 2024) & siapkan loader

In [4]:
from pytorch_forecasting import TimeSeriesDataSet

cutoff_time = pd.Timestamp("2024-12-01")
df_trainval = df[df["date"] <= cutoff_time].copy()   # tanpa 2025
df_future   = df.copy()                               # termasuk 2025 (y NaN)

# safety: pastikan train/val bersih
assert df_trainval["y"].isna().sum() == 0
for c in ["cpi_2022base","fx_usd"]:
    assert df_trainval[c].isna().sum() == 0

max_encoder_length = 24
max_prediction_length = 12

training = TimeSeriesDataSet(
    df_trainval,
    time_idx="time_idx",
    target="y",
    group_ids=["series_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=["month","holiday_flag","holidays_count","evoa_available"],
    time_varying_unknown_reals=["y","cpi_2022base","fx_usd"],
)
validation = TimeSeriesDataSet.from_dataset(training, df_trainval, predict=True, stop_randomization=True)

batch_size = 64
train_loader = training.to_dataloader(train=True,  batch_size=batch_size, num_workers=0)
val_loader   = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)


In [5]:
#@title Build datasets (encoder=24, decoder=12)
max_encoder_length = 24
max_prediction_length = 12
cutoff_time = pd.Timestamp("2024-12-01")  # terakhir 2024 untuk latih/validasi

df_trainval = df[df.date <= cutoff_time].copy()

training = TimeSeriesDataSet(
    df_trainval,
    time_idx="time_idx",
    target="y",
    group_ids=["series_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=["month", "holiday_flag", "holidays_count", "evoa_available"],
    time_varying_unknown_reals=["y", "cpi_2022base", "fx_usd"],
)
validation = TimeSeriesDataSet.from_dataset(training, df_trainval, predict=True, stop_randomization=True)
batch_size = 64
train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_loader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [8]:
#@title Sanity check & dataloader
print("n_samples (training windows):", len(training))

batch_size = 16   # kalau 'train batches' masih 1, turunkan ke 8 atau 4
train_loader = training.to_dataloader(train=True,  batch_size=batch_size, num_workers=0)
val_loader   = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

print("train batches:", len(train_loader), "| val batches:", len(val_loader))


n_samples (training windows): 25
train batches: 1 | val batches: 1


In [9]:
#@title Train TFT (EarlyStopping + Best Checkpoint)
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

pl.seed_everything(42)

es  = EarlyStopping(monitor="val_loss", patience=5, mode="min")
ckp = ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1, filename="tft-best")

trainer = pl.Trainer(
    max_epochs=200,
    accelerator="auto", devices=1,
    gradient_clip_val=0.1,
    callbacks=[es, ckp],
    log_every_n_steps=1,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-3,
    hidden_size=16,
    attention_head_size=4,
    dropout=0.1,
    loss=QuantileLoss(),
)

trainer.fit(tft, train_loader, val_loader)

# load model terbaik
from pytorch_forecasting import TemporalFusionTransformer as TFT
best_tft = TFT.load_from_checkpoint(ckp.best_model_path)
print("Loaded best checkpoint:", ckp.best_model_path)


INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: 
   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 0      | train
3  | prescalers                         | ModuleDict                      | 112    | train

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Loaded best checkpoint: /content/lightning_logs/version_2/checkpoints/tft-best.ckpt


In [11]:
#@title Validation metrics (SMAPE & MAPE) — versi aman untuk PF 1.x
import numpy as np
import torch

# 1) Prediksi median (tanpa raw)
pred_median = best_tft.predict(val_loader).detach().cpu().numpy()

# 2) Ambil target asli dari val_loader
y_true_batches = []
for batch in val_loader:
    yb = batch[1]               # biasanya (y) atau (y, weight)
    if isinstance(yb, (list, tuple)):
        yb = yb[0]
    y_true_batches.append(yb.detach().cpu().numpy())
y_true = np.concatenate(y_true_batches, axis=0)

# 3) Samakan shape (flatten)
y_true = y_true.reshape(-1)
y_pred = pred_median.reshape(-1)

# 4) Hitung metrik
def smape(y_true, y_pred):
    denom = np.abs(y_true) + np.abs(y_pred)
    return 100 * np.mean(np.where(denom==0, 0, np.abs(y_true - y_pred) / denom))

def mape(y_true, y_pred):
    return 100 * np.mean(np.where(y_true==0, 0, np.abs((y_true - y_pred) / y_true)))

print({"SMAPE": smape(y_true, y_pred), "MAPE": mape(y_true, y_pred)})


INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:lightning.pytorch.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


{'SMAPE': np.float32(5.0084486), 'MAPE': np.float32(10.728851)}


In [25]:
# === TFT metrics khusus 2024 (tanpa mode="raw") ===
import numpy as np

# 1) Prediksi semua window di validation
pred_all = best_tft.predict(val_loader).detach().cpu().numpy()   # shape (N_windows, 12)

# 2) Target asli untuk semua window di validation
y_true_batches = []
for batch in val_loader:
    yb = batch[1]                   # bisa (y) atau (y, weight)
    if isinstance(yb, (list, tuple)):
        yb = yb[0]
    y_true_batches.append(yb.detach().cpu().numpy())
true_all = np.concatenate(y_true_batches, axis=0)                # shape (N_windows, 12)

# 3) Ambil window TERAKHIR = Jan–Dec 2024 (karena df_trainval berakhir 2024-12)
y_pred_2024 = pred_all[-1].reshape(-1)
y_true_2024 = true_all[-1].reshape(-1)
print("Shapes 2024:", y_pred_2024.shape, y_true_2024.shape)      # harus (12,) (12,)

# 4) Metrik
def smape(y_true, y_pred):
    y_true, y_pred = np.asarray(y_true, float), np.asarray(y_pred, float)
    denom = np.abs(y_true) + np.abs(y_pred)
    return 100*np.mean(np.where(denom==0, 0, np.abs(y_true - y_pred) / denom))

def mape(y_true, y_pred):
    y_true, y_pred = np.asarray(y_true, float), np.asarray(y_pred, float)
    return 100*np.mean(np.where(y_true==0, 0, np.abs((y_true - y_pred) / y_true)))

print({
    "TFT_2024_SMAPE": smape(y_true_2024, y_pred_2024),
    "TFT_2024_MAPE":  mape(y_true_2024, y_pred_2024),
})


INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:lightning.pytorch.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


Shapes 2024: (12,) (12,)
{'TFT_2024_SMAPE': np.float64(5.008449185958625), 'TFT_2024_MAPE': np.float64(10.728851129459839)}


In [17]:
# AUDIT: pastikan 2025 di df tidak ada NaN dan bertipe numeric
mask_2025 = (df["date"] >= "2025-01-01") & (df["date"] <= "2025-12-01")
print("Banyak baris 2025:", int(mask_2025.sum()))
print("NaN y 2025:", int(df.loc[mask_2025, "y"].isna().sum()))

# tampilkan jika ada yang kosong / bukan numeric
bad = df.loc[mask_2025 & df["y"].isna(), ["date","y","cpi_2022base","fx_usd"]]
display(bad.head(20))

# cek tipe kolom
print(df[["y","cpi_2022base","fx_usd"]].dtypes)


Banyak baris 2025: 12
NaN y 2025: 12


Unnamed: 0,date,y,cpi_2022base,fx_usd
60,2025-01-01,,,
61,2025-02-01,,,
62,2025-03-01,,,
63,2025-04-01,,,
64,2025-05-01,,,
65,2025-06-01,,,
66,2025-07-01,,,
67,2025-08-01,,,
68,2025-09-01,,,
69,2025-10-01,,,


y               float64
cpi_2022base    float64
fx_usd          float64
dtype: object


Forecast 2025 (12 bulan)

In [20]:
# Forecast 2025 (12 bulan) — map prediksi ke 12 baris 2025 (pakai y dari df_pred_input)
from pytorch_forecasting import TimeSeriesDataSet
import numpy as np

# dataset prediksi (skala/encoding ikut 'training')
predict_set    = TimeSeriesDataSet.from_dataset(training, df_pred_input, predict=True, stop_randomization=True)
predict_loader = predict_set.to_dataloader(train=False, batch_size=1, num_workers=0)

# median predictions -> (12,)
yhat_2025 = np.asarray(best_tft.predict(predict_loader)).reshape(-1).astype(float)
print("Shape yhat_2025:", yhat_2025.shape)

# ambil 12 baris 2025 dari df_pred_input (agar kolom y tidak NaN)
mask_2025 = df_pred_input["date"].dt.year == 2025
df_pred_2025 = df_pred_input.loc[mask_2025, ["date","series_id","y"]].copy()
df_pred_2025 = df_pred_2025.sort_values("date").reset_index(drop=True)

# pastikan jumlah cocok, lalu isi yhat
assert len(yhat_2025) == len(df_pred_2025) == 12, (len(yhat_2025), len(df_pred_2025))
df_pred_2025["yhat"] = yhat_2025

print("rows:", len(df_pred_2025))
display(df_pred_2025)          # tampilkan SEMUA baris (bukan .head())
# kalau tetap cuma 5 baris yg terlihat, itu karena UI; coba print(df_pred_2025.to_string(index=False))


INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:lightning.pytorch.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


Shape yhat_2025: (12,)
rows: 12


Unnamed: 0,date,series_id,y,yhat
0,2025-01-01,IDN,927746.0,1296797.0
1,2025-02-01,IDN,1062149.0,1336336.5
2,2025-03-01,IDN,1041861.0,1337458.25
3,2025-04-01,IDN,1066958.0,1342461.375
4,2025-05-01,IDN,1145499.0,1344352.875
5,2025-06-01,IDN,1197941.0,1345855.25
6,2025-07-01,IDN,1310756.0,1345321.0
7,2025-08-01,IDN,1339946.0,1340722.75
8,2025-09-01,IDN,1279258.0,1332324.75
9,2025-10-01,IDN,1193867.0,1322329.75


Save outputs

In [21]:
df_pred_2025.to_csv("tft_forecast_2025.csv", index=False)
print("Saved:", "tft_forecast_2025.csv")

from google.colab import files
files.download("tft_forecast_2025.csv")


Saved: tft_forecast_2025.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## (Opsional) Baseline Naive & Seasonal-Naive

In [22]:
import numpy as np

def smape(y_true, y_pred):
    y_true, y_pred = np.asarray(y_true, float), np.asarray(y_pred, float)
    denom = np.abs(y_true) + np.abs(y_pred)
    return 100 * np.mean(np.where(denom==0, 0, np.abs(y_true - y_pred) / denom))

def mape(y_true, y_pred):
    y_true, y_pred = np.asarray(y_true, float), np.asarray(y_pred, float)
    return 100 * np.mean(np.where(y_true==0, 0, np.abs((y_true - y_pred) / y_true)))

mask24 = (df["date"] >= "2024-01-01") & (df["date"] <= "2024-12-01")

# Naive-1
df_naive = df.copy()
df_naive["yhat"] = df_naive["y"].shift(1)
valid = mask24 & df_naive["yhat"].notna()
print("Naive:", {"SMAPE": smape(df.loc[valid,"y"], df_naive.loc[valid,"yhat"]),
                 "MAPE":  mape (df.loc[valid,"y"], df_naive.loc[valid,"yhat"])})

# Seasonal-Naive-12
df_snaive = df.copy()
df_snaive["yhat"] = df_snaive["y"].shift(12)
valid = mask24 & df_snaive["yhat"].notna()
print("SNaive:", {"SMAPE": smape(df.loc[valid,"y"], df_snaive.loc[valid,"yhat"]),
                  "MAPE":  mape (df.loc[valid,"y"], df_snaive.loc[valid,"yhat"])})


Naive: {'SMAPE': np.float64(3.2026649638671723), 'MAPE': np.float64(6.271776695853699)}
SNaive: {'SMAPE': np.float64(9.173516470685529), 'MAPE': np.float64(16.399193013135125)}
