In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

# ------------------- Data Loading & Preprocessing -------------------
df = pd.read_csv('../artifacts/engineered_data.csv')
target_col = "Weather Type"
cat_cols = ['Season', 'Location']
num_cols = [col for col in df.columns if col not in cat_cols + [target_col]]

# Encode categorical features
df[cat_cols] = df[cat_cols].astype('category')
cat_dims = [len(df[col].cat.categories) for col in cat_cols]
for col in cat_cols:
    df[col] = df[col].cat.codes

# Label encode target
le = LabelEncoder()
df[target_col] = le.fit_transform(df[target_col])
n_classes = df[target_col].nunique()

# Train/val split
X = df.drop(columns=[target_col])
y = df[target_col]
X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Normalize numeric columns
scaler = StandardScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_val[num_cols] = scaler.transform(X_val[num_cols])

# ------------------- Dataset -------------------
class TabularDataset(Dataset):
    def __init__(self, X, y, cat_cols, num_cols):
        self.X_cat = X[cat_cols].values.astype(np.int64)
        self.X_num = X[num_cols].values.astype(np.float32)
        self.y = y.values.astype(np.int64)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_cat[idx], self.X_num[idx], self.y[idx]

train_ds = TabularDataset(X_train, y_train, cat_cols, num_cols)
val_ds = TabularDataset(X_val, y_val, cat_cols, num_cols)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

# ------------------- FT-Transformer Model -------------------
class FTTransformer(nn.Module):
    def __init__(self, cat_dims, num_cont, emb_dim, transformer_layers, n_classes):
        super().__init__()
        self.emb_dim = emb_dim

        # Categorical embeddings
        self.cat_embeds = nn.ModuleList([
            nn.Embedding(num_categories, emb_dim) for num_categories in cat_dims
        ])

        # Continuous feature projection (tokenization)
        self.num_proj = nn.Linear(num_cont, emb_dim)

        # CLS token
        self.cls_token = nn.Parameter(torch.randn(1, 1, emb_dim))

        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=emb_dim, nhead=4, batch_first=True, dim_feedforward=256, dropout=0.1
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=transformer_layers)

        # Final classifier head
        self.fc = nn.Sequential(
            nn.LayerNorm(emb_dim),
            nn.Linear(emb_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, n_classes)
        )

    def forward(self, x_cat, x_num):
        # Embedding categorical features
        cat_tokens = [embed(x_cat[:, i]) for i, embed in enumerate(self.cat_embeds)]
        cat_tokens = torch.stack(cat_tokens, dim=1)  # [B, num_cat, emb_dim]

        # Project numerical features
        num_tokens = self.num_proj(x_num).unsqueeze(1)  # [B, 1, emb_dim]

        # Concatenate CLS + numerical + categorical tokens
        batch_size = x_cat.size(0)
        cls_token = self.cls_token.expand(batch_size, -1, -1)  # [B, 1, emb_dim]
        x = torch.cat([cls_token, num_tokens, cat_tokens], dim=1)

        # Pass through transformer
        x = self.transformer(x)

        # Use CLS token output for classification
        x_cls = x[:, 0]
        return self.fc(x_cls)


# ------------------- Training -------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FTTransformer(
    cat_dims,
    num_cont=len(num_cols),
    emb_dim=64,
    transformer_layers=4,
    n_classes=n_classes
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(50):
    model.train()
    total_loss = 0
    for x_cat, x_num, y in tqdm(train_loader):
        x_cat, x_num, y = x_cat.to(device), x_num.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x_cat, x_num)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Train Loss = {total_loss:.4f}")

# ------------------- Evaluation -------------------
model.eval()
all_preds, all_targets = [], []
with torch.no_grad():
    for x_cat, x_num, y in val_loader:
        x_cat, x_num = x_cat.to(device), x_num.to(device)
        preds = model(x_cat, x_num).argmax(1).cpu().numpy()
        all_preds.extend(preds)
        all_targets.extend(y.numpy())

class_names = [str(cls) for cls in le.classes_]

print(classification_report(all_targets, all_preds, target_names=class_names))



100%|██████████| 144/144 [00:03<00:00, 40.73it/s]


Epoch 1: Train Loss = 42.9560


100%|██████████| 144/144 [00:03<00:00, 45.18it/s]


Epoch 2: Train Loss = 18.7417


100%|██████████| 144/144 [00:03<00:00, 46.89it/s]


Epoch 3: Train Loss = 16.0459


100%|██████████| 144/144 [00:03<00:00, 46.83it/s]


Epoch 4: Train Loss = 15.5286


100%|██████████| 144/144 [00:03<00:00, 46.84it/s]


Epoch 5: Train Loss = 13.0806


100%|██████████| 144/144 [00:05<00:00, 24.16it/s]


Epoch 6: Train Loss = 12.9276


100%|██████████| 144/144 [00:03<00:00, 46.89it/s]


Epoch 7: Train Loss = 12.8508


100%|██████████| 144/144 [00:03<00:00, 46.86it/s]


Epoch 8: Train Loss = 11.8858


100%|██████████| 144/144 [00:02<00:00, 50.85it/s]


Epoch 9: Train Loss = 11.3474


100%|██████████| 144/144 [00:02<00:00, 49.00it/s]


Epoch 10: Train Loss = 11.3543


100%|██████████| 144/144 [00:03<00:00, 45.74it/s]


Epoch 11: Train Loss = 10.4537


100%|██████████| 144/144 [00:03<00:00, 45.73it/s]


Epoch 12: Train Loss = 10.7202


100%|██████████| 144/144 [00:03<00:00, 47.74it/s]


Epoch 13: Train Loss = 9.8152


100%|██████████| 144/144 [00:03<00:00, 47.44it/s]


Epoch 14: Train Loss = 10.2435


100%|██████████| 144/144 [00:03<00:00, 42.61it/s]


Epoch 15: Train Loss = 9.8582


100%|██████████| 144/144 [00:03<00:00, 39.84it/s]


Epoch 16: Train Loss = 9.8521


100%|██████████| 144/144 [00:03<00:00, 36.61it/s]


Epoch 17: Train Loss = 9.5617


100%|██████████| 144/144 [00:04<00:00, 33.40it/s]


Epoch 18: Train Loss = 8.7440


100%|██████████| 144/144 [00:05<00:00, 26.68it/s]


Epoch 19: Train Loss = 9.2053


100%|██████████| 144/144 [00:03<00:00, 42.57it/s]


Epoch 20: Train Loss = 8.6077


100%|██████████| 144/144 [00:03<00:00, 42.81it/s]


Epoch 21: Train Loss = 8.9920


100%|██████████| 144/144 [00:03<00:00, 42.08it/s]


Epoch 22: Train Loss = 9.4617


100%|██████████| 144/144 [00:03<00:00, 40.98it/s]


Epoch 23: Train Loss = 8.1125


100%|██████████| 144/144 [00:03<00:00, 44.75it/s]


Epoch 24: Train Loss = 9.1056


100%|██████████| 144/144 [00:03<00:00, 42.70it/s]


Epoch 25: Train Loss = 8.9191


100%|██████████| 144/144 [00:03<00:00, 42.20it/s]


Epoch 26: Train Loss = 9.6726


100%|██████████| 144/144 [00:03<00:00, 45.29it/s]


Epoch 27: Train Loss = 7.2730


100%|██████████| 144/144 [00:03<00:00, 47.92it/s]


Epoch 28: Train Loss = 7.9593


100%|██████████| 144/144 [00:03<00:00, 44.44it/s]


Epoch 29: Train Loss = 7.2793


100%|██████████| 144/144 [00:03<00:00, 45.57it/s]


Epoch 30: Train Loss = 7.3978


100%|██████████| 144/144 [00:03<00:00, 47.13it/s]


Epoch 31: Train Loss = 6.4317


100%|██████████| 144/144 [00:03<00:00, 45.55it/s]


Epoch 32: Train Loss = 7.8186


100%|██████████| 144/144 [00:03<00:00, 41.65it/s]


Epoch 33: Train Loss = 7.3317


100%|██████████| 144/144 [00:03<00:00, 46.22it/s]


Epoch 34: Train Loss = 7.0159


100%|██████████| 144/144 [00:02<00:00, 50.29it/s]


Epoch 35: Train Loss = 7.6066


100%|██████████| 144/144 [00:03<00:00, 43.39it/s]


Epoch 36: Train Loss = 6.9035


100%|██████████| 144/144 [00:03<00:00, 41.95it/s]


Epoch 37: Train Loss = 7.5272


100%|██████████| 144/144 [00:03<00:00, 41.40it/s]


Epoch 38: Train Loss = 6.5086


100%|██████████| 144/144 [00:03<00:00, 42.90it/s]


Epoch 39: Train Loss = 7.3075


100%|██████████| 144/144 [00:03<00:00, 42.66it/s]


Epoch 40: Train Loss = 7.3108


100%|██████████| 144/144 [00:03<00:00, 44.77it/s]


Epoch 41: Train Loss = 6.7981


100%|██████████| 144/144 [00:03<00:00, 43.81it/s]


Epoch 42: Train Loss = 6.2261


100%|██████████| 144/144 [00:03<00:00, 44.33it/s]


Epoch 43: Train Loss = 7.4897


100%|██████████| 144/144 [00:03<00:00, 41.04it/s]


Epoch 44: Train Loss = 7.7376


100%|██████████| 144/144 [00:03<00:00, 41.76it/s]


Epoch 45: Train Loss = 7.1635


100%|██████████| 144/144 [00:03<00:00, 46.83it/s]


Epoch 46: Train Loss = 6.2626


100%|██████████| 144/144 [00:02<00:00, 51.52it/s]


Epoch 47: Train Loss = 6.3932


100%|██████████| 144/144 [00:03<00:00, 44.88it/s]


Epoch 48: Train Loss = 6.0625


100%|██████████| 144/144 [00:03<00:00, 45.53it/s]


Epoch 49: Train Loss = 7.0820


100%|██████████| 144/144 [00:03<00:00, 41.08it/s]


Epoch 50: Train Loss = 5.9319
              precision    recall  f1-score   support

           0       0.97      0.93      0.95       594
           1       0.95      0.96      0.96       563
           2       0.98      0.99      0.99       579
           3       0.96      0.98      0.97       565

    accuracy                           0.97      2301
   macro avg       0.97      0.97      0.97      2301
weighted avg       0.97      0.97      0.97      2301

tensor([[-2.1055,  6.6930, -5.9019, -4.8759],
        [-6.5361, -4.2565,  6.7901, -1.9879],
        [-1.9524,  6.6164, -6.1349, -4.6724],
        [ 6.2432, -5.2904, -5.4338, -2.4733],
        [-6.4618, -5.0703, -1.7489,  7.7585]])
