In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from pytorch_forecasting import (
    TimeSeriesDataSet,
    TemporalFusionTransformer,
    GroupNormalizer
)
from pytorch_forecasting.metrics import QuantileLoss
from lightning import Trainer, LightningModule

ModuleNotFoundError: No module named 'torch'

In [None]:
from pytorch_lightning import LightningModule
import torch




In [None]:
class WeatherChunkDataset(Dataset):
    def __init__(self, filepath, chunk_size=200000):
        self.filepath = filepath
        self.chunk_size = chunk_size
        self.chunks = self._count_chunks()

    def _count_chunks(self):
        total = sum(1 for _ in pd.read_csv(self.filepath, chunksize=self.chunk_size))
        print(f"🧩 Tổng số chunk: {total}")
        return total

    def __len__(self):
        return self.chunks

    def __getitem__(self, idx):
        reader = pd.read_csv(self.filepath, chunksize=self.chunk_size, parse_dates=["date"])
        for i, chunk in enumerate(reader):
            if i == idx:
                df = chunk.copy()
                cols = ["date","T2M","QV2M","PS","WS10M","PRECTOTCORR",
                        "CLRSKY_SFC_SW_DWN","Latitude","Longitude",
                        "hour","day","month","season"]
                df = df[cols].dropna()

                df["location"] = df["Latitude"].astype(str) + "_" + df["Longitude"].astype(str)
                df["time_idx"] = (df["date"] - df["date"].min()).dt.total_seconds() // 3600
                df["time_idx"] = df["time_idx"].astype(int)
                return df

In [None]:
def create_tft_from_chunk(df_chunk):
    max_encoder_length = 24 * 30
    max_prediction_length = 24 * 7

    training_cutoff = df_chunk["time_idx"].max() - max_prediction_length

    training = TimeSeriesDataSet(
        df_chunk[lambda x: x.time_idx <= training_cutoff],
        time_idx="time_idx",
        target="T2M",
        group_ids=["location"],
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=max_prediction_length,
        static_reals=["Latitude", "Longitude"],
        time_varying_known_reals=["hour","day","month"],
        time_varying_unknown_reals=[
            "T2M","QV2M","PS","WS10M","PRECTOTCORR","CLRSKY_SFC_SW_DWN"
        ],
        target_normalizer=GroupNormalizer(groups=["location"]),
        add_relative_time_idx=True,
        add_target_scales=True,
        allow_missing_timesteps=True,
    )

    validation = TimeSeriesDataSet.from_dataset(training, df_chunk, predict=True, stop_randomization=True)

    train_loader = training.to_dataloader(train=True, batch_size=64, num_workers=0)
    val_loader = validation.to_dataloader(train=False, batch_size=64, num_workers=0)

    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=1e-3,
        hidden_size=32,
        attention_head_size=4,
        dropout=0.2,
        hidden_continuous_size=16,
        loss=QuantileLoss(),
        log_interval=10,
        reduce_on_plateau_patience=4,
    )

    return tft, train_loader, val_loader


In [None]:
class TFTLightning(LightningModule):
    def __init__(self, tft_model):
        super().__init__()
        self.model = tft_model

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        loss = self.model.training_step(batch, batch_idx)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        loss = self.model.validation_step(batch, batch_idx)
        self.log("val_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return self.model.configure_optimizers()

In [None]:
if __name__ == "__main__":
    filepath = "datatrainai_5_years_clean.csv"
    dataset = WeatherChunkDataset(filepath, chunk_size=200_000)
    os.makedirs("checkpoints", exist_ok=True)
    last_ckpt = None

    for i in range(len(dataset)):
        print(f"\n🚀 Huấn luyện chunk {i+1}/{len(dataset)} ...")
        df_chunk = dataset[i]

        tft, train_loader, val_loader = create_tft_from_chunk(df_chunk)
        tft = TFTLightning(tft)

        trainer = Trainer(
            accelerator="gpu",
            devices=1,
            max_epochs=5,
            log_every_n_steps=10
        )

        if last_ckpt and os.path.exists(last_ckpt):
            print(f"🔁 Tiếp tục huấn luyện từ {last_ckpt}")
            trainer.fit(tft, ckpt_path=last_ckpt,
                        train_dataloaders=train_loader,
                        val_dataloaders=val_loader)
        else:
            trainer.fit(tft, train_dataloaders=train_loader, val_dataloaders=val_loader)

        ckpt_path = f"checkpoints/tft_weather_chunk_{i+1}.ckpt"
        trainer.save_checkpoint(ckpt_path)
        last_ckpt = ckpt_path
        print(f"✅ Đã lưu checkpoint: {ckpt_path}")

    print("\n🎉 HUẤN LUYỆN HOÀN TẤT (GPU OK, PYTHON 3.12) 🚀")

Số chunk đọc được: 9


ModuleNotFoundError: No module named 'pytorch_forecasting.models.temporal_fusion_transformer.attention'

In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")


CUDA available: True
GPU name: NVIDIA GeForce GTX 1650
