In [2]:
import pandas as pd

In [3]:
# import dataset
df = pd.read_csv(r'C:\Users\kettin\Documents\Lomba\GEMASTIKSciPaper\Dataset\ENSO_dataset_balanced.csv')
print(df.head())

     0         1    2    3    4    5         6         7         8         9  \
0  0.0  0.923388  0.0  0.0  0.0  0.0  2.990105  0.017291  1.682632  4.534331   
1  0.0  1.929875  0.0  0.0  0.0  0.0  5.562883  0.777918  0.464271  5.591380   
2  0.0  0.543871  0.0  0.0  0.0  0.0  3.117626  1.129129  0.000000  6.814785   
3  0.0  2.290855  0.0  0.0  0.0  0.0  1.345732  0.800221  0.000000  6.536081   
4  0.0  4.255888  0.0  0.0  0.0  0.0  0.000000  0.707398  0.000000  7.896858   

   ...       119  120  121  122  123  124       125       126  127  NINO3.4  
0  ...  0.155618  0.0  0.0  0.0  0.0  0.0  6.695034  1.521119  0.0    -1.55  
1  ...  1.411000  0.0  0.0  0.0  0.0  0.0  6.529056  2.613518  0.0    -1.78  
2  ...  1.029515  0.0  0.0  0.0  0.0  0.0  5.309260  2.167389  0.0    -1.38  
3  ...  1.345663  0.0  0.0  0.0  0.0  0.0  4.031511  2.411777  0.0    -1.90  
4  ...  2.738996  0.0  0.0  0.0  0.0  0.0  1.775552  4.273332  0.0    -1.74  

[5 rows x 129 columns]


In [4]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Pisahkan fitur dan target dari df
X = df.drop(columns=['NINO3.4']).values  # semua kolom kecuali target
y = df['NINO3.4'].values  # target tetap apa adanya

# Normalisasi ulang fitur
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Gabung kembali jadi DataFrame
df = pd.DataFrame(X_scaled, columns=df.columns[:-1])  # fitur dengan nama kolom aslinya
df['NINO3.4'] = y  # target tetap sama (nggak dinormalisasi)

print("Normalisasi selesai. Fitur sekarang 0–1, target tetap asli.")
print(f"Ukuran dataset: {df.shape[0]} sampel, {df.shape[1]-1} fitur + 1 target (NINO3.4)")


Normalisasi selesai. Fitur sekarang 0–1, target tetap asli.
Ukuran dataset: 1221 sampel, 128 fitur + 1 target (NINO3.4)


In [5]:
print(df.head())

     0         1    2    3    4    5         6         7         8         9  \
0  0.0  0.088295  0.0  0.0  0.0  0.0  0.227698  0.001255  0.197671  0.335540   
1  0.0  0.184535  0.0  0.0  0.0  0.0  0.423616  0.056479  0.054541  0.413761   
2  0.0  0.052005  0.0  0.0  0.0  0.0  0.237408  0.081978  0.000000  0.504293   
3  0.0  0.219052  0.0  0.0  0.0  0.0  0.102478  0.058099  0.000000  0.483669   
4  0.0  0.406950  0.0  0.0  0.0  0.0  0.000000  0.051359  0.000000  0.584366   

   ...       119  120  121  122  123  124       125       126  127  NINO3.4  
0  ...  0.010775  0.0  0.0  0.0  0.0  0.0  0.557715  0.144249  0.0    -1.55  
1  ...  0.097702  0.0  0.0  0.0  0.0  0.0  0.543889  0.247843  0.0    -1.78  
2  ...  0.071287  0.0  0.0  0.0  0.0  0.0  0.442276  0.205536  0.0    -1.38  
3  ...  0.093177  0.0  0.0  0.0  0.0  0.0  0.335836  0.228711  0.0    -1.90  
4  ...  0.189656  0.0  0.0  0.0  0.0  0.0  0.147909  0.405245  0.0    -1.74  

[5 rows x 129 columns]


In [6]:
import torch
import numpy as np
import pandas as pd
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerForPrediction
from torch.utils.data import Dataset, DataLoader

In [7]:
context_length = 96
prediction_length = 24
stride = 12  # window geser 12 bulan

class ENSODataset(Dataset):
    def __init__(self, df, context_length, prediction_length, stride=1):
        self.features = df.drop(columns=["NINO3.4"]).values.astype(np.float32)
        self.target = df["NINO3.4"].values.astype(np.float32)
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.stride = stride
        self.indices = self.create_indices()

    def create_indices(self):
        total_length = self.context_length + self.prediction_length
        return [
            i for i in range(0, len(self.target) - total_length, self.stride)
        ]

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        i = self.indices[idx]
        past_values = self.target[i : i + self.context_length]
        future_values = self.target[i + self.context_length : i + self.context_length + self.prediction_length]

        past_time_features = self.features[i : i + self.context_length]
        future_time_features = self.features[i + self.context_length : i + self.context_length + self.prediction_length]

        return {
            "past_values": torch.tensor(past_values).unsqueeze(-1),
            "past_time_features": torch.tensor(past_time_features),
            "future_time_features": torch.tensor(future_time_features),
            "future_values": torch.tensor(future_values).unsqueeze(-1),
            "past_observed_mask": torch.ones_like(torch.tensor(past_values)).unsqueeze(-1),
        }


In [8]:
train_size = int(len(df) * 0.8)
df_train = df.iloc[:train_size]
df_test = df.iloc[train_size:]

train_dataset = ENSODataset(df_train, context_length, prediction_length, stride)
test_dataset = ENSODataset(df_test, context_length, prediction_length, stride)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [16]:
config = TimeSeriesTransformerConfig(
    prediction_length=prediction_length,
    context_length=context_length,
    lags_sequence=[1],
    input_size=1,
    num_time_features=128,  # sesuai jumlah kolom fitur kamu
    num_static_categorical_features=0,
    d_model=64,
    encoder_layers=3,
    decoder_layers=3,
)

model = TimeSeriesTransformerForPrediction(config)


In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = torch.nn.MSELoss()

for epoch in range(20):
    model.train()
    total_loss = 0
    for batch in train_loader:
        for k in batch:
            batch[k] = batch[k].to(device)
        output = model(**batch)
        loss = loss_fn(output.loss, torch.zeros_like(output.loss))  # loss sudah dihitung di dalam model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


ValueError: lags cannot go further than history length, found lag 1 while history length is only 120