In [8]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
from sklearn.metrics import f1_score, accuracy_score
from collections import Counter
from skimage.feature import hog
from skimage.transform import resize


def get_transforms(img_size=64):
    train_transform = transforms.Compose([
        transforms.Grayscale(),
        transforms.Resize((img_size, img_size)),
        transforms.RandomRotation(20),
        transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
        transforms.RandomAffine(degrees=15, translate=(0.15, 0.15), scale=(0.85, 1.15)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])

    val_transform = transforms.Compose([
        transforms.Grayscale(),
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])
    return train_transform, val_transform


def extract_hog(img_tensor, img_size=64):
    img_np = img_tensor.squeeze().numpy()
    img_np_resized = resize(img_np, (img_size, img_size))
    hog_feat = hog(img_np_resized, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
    return torch.tensor(hog_feat, dtype=torch.float32)


class HybridCNNLSTM(nn.Module):
    def __init__(self, hog_feat_len):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.lstm = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, bidirectional=True, batch_first=True)

        self.fc = nn.Sequential(
            nn.Linear(64 * 2 * 16 * 16 + hog_feat_len, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1)
        )

    def forward(self, x, hog_feats):
        cnn_feat = self.cnn(x)  # [B, 128, 16, 16]
        b, c, h, w = cnn_feat.size()

        cnn_feat_seq = cnn_feat.permute(0, 2, 3, 1).contiguous()  # [B, 16, 16, 128]
        cnn_feat_seq = cnn_feat_seq.view(b, h * w, c)  # [B, 256, 128]

        lstm_out, _ = self.lstm(cnn_feat_seq)  # [B, 256, 128]
        lstm_feat = lstm_out.contiguous().view(b, -1)  # [B, 256 * 128]

        hog_feats = (hog_feats - hog_feats.mean(dim=1, keepdim=True)) / (hog_feats.std(dim=1, keepdim=True) + 1e-6)
        combined = torch.cat((lstm_feat, hog_feats.to(x.device)), dim=1)

        return self.fc(combined)


def train_model():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"🚀 Using device: {device}")
    IMG_SIZE = 64
    BATCH_SIZE = 16
    EPOCHS = 40
    PATIENCE = 10
    DATA_PATH = "data"

    train_transform, val_transform = get_transforms(IMG_SIZE)

    full_dataset = ImageFolder(DATA_PATH, transform=train_transform)
    print(f"✅ Loaded dataset with {len(full_dataset)} samples")
    print("🗂️ Class to index mapping:", full_dataset.class_to_idx)
    label_counts = Counter([label for _, label in full_dataset])
    print("🔍 Label Distribution:", label_counts)

    pos_weight = torch.tensor([label_counts[0] / label_counts[1]]).to(device)

    train_size = int(0.8 * len(full_dataset))
    val_size = len(full_dataset) - train_size
    train_ds, val_ds = random_split(full_dataset, [train_size, val_size])
    val_ds.dataset.transform = val_transform

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

    dummy_img, _ = full_dataset[0]
    hog_feat_len = len(extract_hog(dummy_img))
    model = HybridCNNLSTM(hog_feat_len).to(device)

    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = optim.Adam(model.parameters(), lr=0.0005)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

    best_f1 = 0
    best_acc = 0
    patience_counter = 0

    for epoch in range(1, EPOCHS + 1):
        model.train()
        train_loss = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.unsqueeze(1).float().to(device)
            hog_feats = torch.stack([extract_hog(img.cpu()) for img in images])
            hog_feats = (hog_feats - hog_feats.mean(dim=1, keepdim=True)) / (hog_feats.std(dim=1, keepdim=True) + 1e-6)

            outputs = model(images, hog_feats)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)

        avg_train_loss = train_loss / len(train_loader.dataset)

        model.eval()
        val_preds, val_labels = [], []
        val_loss = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.unsqueeze(1).float().to(device)
                hog_feats = torch.stack([extract_hog(img.cpu()) for img in images])
                hog_feats = (hog_feats - hog_feats.mean(dim=1, keepdim=True)) / (hog_feats.std(dim=1, keepdim=True) + 1e-6)

                outputs = model(images, hog_feats)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)

                probs = torch.sigmoid(outputs)
                preds = (probs > 0.5).int()
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader.dataset)
        val_acc = accuracy_score(val_labels, val_preds)
        val_f1 = f1_score(val_labels, val_preds, zero_division=0)

        print(f"📉 Epoch {epoch:03d} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc*100:.2f}% | F1 Score: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), "best_model.pth")
            print("💾 Best model saved.")
        else:
            patience_counter += 1
            print(f"⏳ No improvement. Patience {patience_counter}/{PATIENCE}")
            if patience_counter >= PATIENCE:
                print("🛑 Early stopping triggered.")
                break

        scheduler.step()

    print(f"\n✅ Training complete.")
    print(f"🎯 Best Accuracy: {best_acc * 100:.2f}%")
    print(f"🏅 Best F1 Score: {best_f1:.4f}")


if __name__ == "__main__":
    train_model()


🚀 Using device: cpu
✅ Loaded dataset with 254 samples
🗂️ Class to index mapping: {'LPD': 0, 'PD': 1}
🔍 Label Distribution: Counter({0: 137, 1: 117})
📉 Epoch 001 | Val Loss: 1.1163 | Val Acc: 56.86% | F1 Score: 0.0000
⏳ No improvement. Patience 1/10
📉 Epoch 002 | Val Loss: 1.2809 | Val Acc: 56.86% | F1 Score: 0.0000
⏳ No improvement. Patience 2/10
📉 Epoch 003 | Val Loss: 0.8668 | Val Acc: 58.82% | F1 Score: 0.0870
💾 Best model saved.
📉 Epoch 004 | Val Loss: 0.5463 | Val Acc: 86.27% | F1 Score: 0.8108
💾 Best model saved.
📉 Epoch 005 | Val Loss: 0.3563 | Val Acc: 92.16% | F1 Score: 0.9000
💾 Best model saved.
📉 Epoch 006 | Val Loss: 0.4198 | Val Acc: 88.24% | F1 Score: 0.8421
⏳ No improvement. Patience 1/10
📉 Epoch 007 | Val Loss: 0.3308 | Val Acc: 88.24% | F1 Score: 0.8571
⏳ No improvement. Patience 2/10
📉 Epoch 008 | Val Loss: 0.3613 | Val Acc: 90.20% | F1 Score: 0.8718
⏳ No improvement. Patience 3/10
📉 Epoch 009 | Val Loss: 0.4771 | Val Acc: 90.20% | F1 Score: 0.8718
⏳ No improvement. P

In [11]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from skimage.feature import hog
from skimage.transform import resize

# Utility to extract HOG features from an image
def extract_hog(img_tensor, img_size=64):
    img_np = img_tensor.squeeze().numpy()
    img_np_resized = resize(img_np, (img_size, img_size))
    hog_feat = hog(img_np_resized, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
    return torch.tensor(hog_feat, dtype=torch.float32)

# Custom dataset combining image and sensor features
class MultimodalDataset(Dataset):
    def __init__(self, image_root, sensor_csv_path, transform=None, img_size=64):
        self.transform = transform
        self.img_size = img_size

        self.image_paths = []
        all_labels = []

        for class_idx, class_name in enumerate(sorted(os.listdir(image_root))):
            class_path = os.path.join(image_root, class_name)
            for img_file in os.listdir(class_path):
                if img_file.endswith('.jpg'):
                    self.image_paths.append(os.path.join(class_path, img_file))
                    all_labels.append(class_idx)

        sensor_df = pd.read_csv(sensor_csv_path)
        self.labels = sensor_df["Class"].values.astype(np.float32)
        self.sensor_feats = sensor_df.drop(columns=["Class"]).values.astype(np.float32)

        min_len = min(len(self.image_paths), len(self.labels), len(self.sensor_feats))
        self.image_paths = self.image_paths[:min_len]
        self.labels = self.labels[:min_len]
        self.sensor_feats = self.sensor_feats[:min_len]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        sensor_feat = self.sensor_feats[idx]

        image = Image.open(image_path).convert("L")
        if self.transform:
            image = self.transform(image)

        hog_feat = extract_hog(image)
        sensor_feat = torch.tensor(sensor_feat, dtype=torch.float32)

        return image, hog_feat, sensor_feat, torch.tensor(label, dtype=torch.float32)

# CNN + LSTM + HOG-based feature extractor for image
class HybridCNNLSTM(nn.Module):
    def __init__(self, hog_feat_len):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3)
        )
        self.lstm = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, bidirectional=True, batch_first=True)
        self.feature_dim = 64 * 2 * 16 * 16 + hog_feat_len

    def forward(self, x, hog_feats):
        cnn_feat = self.cnn(x)
        b, c, h, w = cnn_feat.size()
        cnn_feat_seq = cnn_feat.permute(0, 2, 3, 1).contiguous().view(b, h * w, c)
        lstm_out, _ = self.lstm(cnn_feat_seq)
        lstm_feat = lstm_out.contiguous().view(b, -1)
        hog_feats = (hog_feats - hog_feats.mean(dim=1, keepdim=True)) / (hog_feats.std(dim=1, keepdim=True) + 1e-6)
        return torch.cat((lstm_feat, hog_feats.to(x.device)), dim=1)

# Final multimodal classifier
class MultimodalDysgraphiaModel(nn.Module):
    def __init__(self, hog_feat_len, sensor_feat_len):
        super().__init__()
        self.image_model = HybridCNNLSTM(hog_feat_len)
        self.sensor_encoder = nn.Sequential(
            nn.Linear(sensor_feat_len, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.classifier = nn.Sequential(
            nn.Linear(self.image_model.feature_dim + 32, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, 1)
        )

    def forward(self, image, hog_feat, sensor_feat):
        image_features = self.image_model(image, hog_feat)
        if self.training and torch.rand(1).item() < 0.2:  # reduce sensor dropout
            sensor_feat = torch.zeros_like(sensor_feat)
        sensor_embed = self.sensor_encoder(sensor_feat)
        combined = torch.cat([image_features, sensor_embed], dim=1)
        return self.classifier(combined)

# Training function
def train():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\n🚀 Using device: {device}")

    IMG_SIZE = 64
    BATCH_SIZE = 8
    EPOCHS = 60
    PATIENCE = 10
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])

    dataset = MultimodalDataset("data", "features/extracted_features_auto.csv", transform)
    train_size = int(0.85 * len(dataset))
    val_size = len(dataset) - train_size
    train_ds, val_ds = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

    hog_feat_len = len(extract_hog(torch.randn(1, IMG_SIZE, IMG_SIZE)))
    model = MultimodalDysgraphiaModel(hog_feat_len, 12).to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

    best_f1 = 0
    best_acc = 0
    patience_counter = 0

    for epoch in range(1, EPOCHS + 1):
        model.train()
        total_loss = 0
        for img, hog, sensor, label in train_loader:
            img, hog, sensor, label = img.to(device), hog.to(device), sensor.to(device), label.unsqueeze(1).to(device)
            optimizer.zero_grad()
            out = model(img, hog, sensor)
            loss = criterion(out, label)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * img.size(0)

        model.eval()
        preds, targets = [], []
        val_loss = 0
        with torch.no_grad():
            for img, hog, sensor, label in val_loader:
                img, hog, sensor = img.to(device), hog.to(device), sensor.to(device)
                label = label.unsqueeze(1).to(device)
                out = model(img, hog, sensor)
                loss = criterion(out, label)
                val_loss += loss.item() * img.size(0)
                pred = (torch.sigmoid(out) > 0.5).int()
                preds.extend(pred.cpu().numpy())
                targets.extend(label.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader.dataset)
        val_acc = accuracy_score(targets, preds)
        val_f1 = f1_score(targets, preds)

        print(f"📉 Epoch {epoch:03d} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc*100:.2f}% | F1 Score: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), "best_multimodal_model.pth")
            print("💾 Best model saved.")
        else:
            patience_counter += 1
            print(f"⏳ No improvement. Patience {patience_counter}/{PATIENCE}")
            if patience_counter >= PATIENCE:
                print("🛑 Early stopping triggered.")
                break

    print(f"\n🎯 Final Best Accuracy: {best_acc * 100:.2f}%")
    print(f"🏅 Final Best F1 Score: {best_f1:.4f}")

if __name__ == "__main__":
    train()



🚀 Using device: cpu
📉 Epoch 001 | Val Loss: 0.6816 | Val Acc: 46.15% | F1 Score: 0.6316
💾 Best model saved.
📉 Epoch 002 | Val Loss: 0.5066 | Val Acc: 84.62% | F1 Score: 0.9167
💾 Best model saved.
📉 Epoch 003 | Val Loss: 0.5114 | Val Acc: 76.92% | F1 Score: 0.8696
⏳ No improvement. Patience 1/10
📉 Epoch 004 | Val Loss: 0.5105 | Val Acc: 76.92% | F1 Score: 0.8696
⏳ No improvement. Patience 2/10
📉 Epoch 005 | Val Loss: 0.5645 | Val Acc: 69.23% | F1 Score: 0.8182
⏳ No improvement. Patience 3/10
📉 Epoch 006 | Val Loss: 0.5015 | Val Acc: 92.31% | F1 Score: 0.9600
💾 Best model saved.
📉 Epoch 007 | Val Loss: 0.5690 | Val Acc: 61.54% | F1 Score: 0.7619
⏳ No improvement. Patience 1/10
📉 Epoch 008 | Val Loss: 0.5779 | Val Acc: 53.85% | F1 Score: 0.7000
⏳ No improvement. Patience 2/10
📉 Epoch 009 | Val Loss: 0.6833 | Val Acc: 61.54% | F1 Score: 0.7619
⏳ No improvement. Patience 3/10
📉 Epoch 010 | Val Loss: 0.6336 | Val Acc: 61.54% | F1 Score: 0.7619
⏳ No improvement. Patience 4/10
📉 Epoch 011 | V

In [32]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# ==================================================
# 1. Autoencoder Feature Extraction
# ==================================================
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim=32):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128), nn.ReLU(),
            nn.Linear(128, 64), nn.ReLU(),
            nn.Linear(64, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64), nn.ReLU(),
            nn.Linear(64, 128), nn.ReLU(),
            nn.Linear(128, input_dim), nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return z, x_recon


# ==================================================
# 2. GDST (Simplified: Graph + Transformer)
# ==================================================
class GDSTExtractor(nn.Module):
    def __init__(self, latent_dim, heads=4):
        super().__init__()
        self.attn = nn.MultiheadAttention(latent_dim, heads, batch_first=True)
        self.fc = nn.Linear(latent_dim, latent_dim)

    def forward(self, z):
        # z: [B, latent_dim]
        z_seq = z.unsqueeze(1)  # [B, 1, latent_dim]
        attn_out, _ = self.attn(z_seq, z_seq, z_seq)
        return self.fc(attn_out.squeeze(1))  # [B, latent_dim]


# ==================================================
# 3. Dataset
# ==================================================
class SensorDataset(Dataset):
    def __init__(self, sensor_csv_path, autoencoder, gdst, device):
        sensor_df = pd.read_csv(sensor_csv_path)

        self.labels = sensor_df["Class"].values.astype(np.float32)
        self.sensor_feats = sensor_df.drop(columns=["Class"]).values.astype(np.float32)

        # Precompute hybrid features using Autoencoder + GDST
        self.hybrid_feats = []
        autoencoder.eval(), gdst.eval()
        with torch.no_grad():
            for row in self.sensor_feats:
                row_t = torch.tensor(row, dtype=torch.float32).unsqueeze(0).to(device)  # ✅ add batch dim
                z = autoencoder.encoder(row_t).squeeze(0)  # [latent_dim]
                g = gdst(z.unsqueeze(0)).squeeze(0)        # [latent_dim]
                hybrid = torch.cat([z, g], dim=0).cpu().numpy()
                self.hybrid_feats.append(hybrid)

        self.hybrid_feats = np.array(self.hybrid_feats)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.sensor_feats[idx], dtype=torch.float32),
            torch.tensor(self.hybrid_feats[idx], dtype=torch.float32),
            torch.tensor(self.labels[idx], dtype=torch.float32)
        )



# ==================================================
# 4. CNN-LSTM Model
# ==================================================
class CNNLSTMModel(nn.Module):
    def __init__(self, raw_dim, hybrid_dim):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(1, 16, 3, padding=1), nn.ReLU(),
            nn.Conv1d(16, 32, 3, padding=1), nn.ReLU(),
            nn.AdaptiveMaxPool1d(8),
            nn.Flatten()
        )

        self.lstm = nn.LSTM(input_size=1, hidden_size=32, batch_first=True, bidirectional=True)

        self.hybrid_branch = nn.Sequential(
            nn.Linear(hybrid_dim, 64), nn.ReLU(),
            nn.Linear(64, 32), nn.ReLU()
        )

        lstm_dim = 64 * raw_dim
        combined_dim = 256 + lstm_dim + 32  # CNN + LSTM + Hybrid

        self.classifier = nn.Sequential(
            nn.Linear(combined_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1)
        )

    def forward(self, raw_feat, hybrid_feat):
        cnn_in = raw_feat.unsqueeze(1)
        cnn_out = self.cnn(cnn_in)

        lstm_in = raw_feat.unsqueeze(-1)
        lstm_out, _ = self.lstm(lstm_in)
        lstm_out = lstm_out.reshape(lstm_out.size(0), -1)

        hybrid_out = self.hybrid_branch(hybrid_feat)

        combined = torch.cat([cnn_out, lstm_out, hybrid_out], dim=1)
        return self.classifier(combined)


# ==================================================
# 5. Hazelnut Optimizer
# ==================================================
class HazelnutOptimizer(torch.optim.Optimizer):
    def __init__(self, params, lr=0.01, population_size=5):
        defaults = dict(lr=lr, population_size=population_size)
        super().__init__(params, defaults)
        self.population = []

    @torch.no_grad()
    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            lr = group["lr"]
            pop_size = group["population_size"]
            for p in group["params"]:
                if p.grad is None:
                    continue
                noise = torch.randn_like(p) * lr * 0.1
                update = -lr * p.grad + noise
                p.add_(update)
                if len(self.population) < pop_size:
                    self.population.append(p.clone())
                else:
                    self.population[np.random.randint(pop_size)] = p.clone()
        return loss


# ==================================================
# 6. Training Loop
# ==================================================
def train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"\n🚀 Using device: {device}")

    BATCH_SIZE = 8
    EPOCHS = 50
    PATIENCE = 8

    # Load CSV
    csv_path = "features/extracted_features_auto.csv"
    df = pd.read_csv(csv_path)
    raw_dim = df.shape[1] - 1

    # Pre-train Autoencoder on raw features
    autoencoder = Autoencoder(raw_dim, latent_dim=32).to(device)
    opt_ae = torch.optim.Adam(autoencoder.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    feats = torch.tensor(df.drop(columns=["Class"]).values, dtype=torch.float32).to(device)
    for epoch in range(10):  # quick pretrain
        z, recon = autoencoder(feats)
        loss = criterion(recon, feats)
        opt_ae.zero_grad()
        loss.backward()
        opt_ae.step()
    print("✅ Autoencoder pretrained.")

    gdst = GDSTExtractor(latent_dim=32).to(device)

    dataset = SensorDataset(csv_path, autoencoder, gdst, device)
    raw_dim = dataset.sensor_feats.shape[1]
    hybrid_dim = dataset.hybrid_feats.shape[1]

    train_size = int(0.85 * len(dataset))
    val_size = len(dataset) - train_size
    train_ds, val_ds = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

    model = CNNLSTMModel(raw_dim, hybrid_dim).to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = HazelnutOptimizer(model.parameters(), lr=0.001)

    best_f1 = 0
    best_acc = 0
    patience_counter = 0

    for epoch in range(1, EPOCHS + 1):
        model.train()
        total_loss = 0
        for raw, hybrid, label in train_loader:
            raw, hybrid, label = raw.to(device), hybrid.to(device), label.unsqueeze(1).to(device)

            optimizer.zero_grad()
            out = model(raw, hybrid)
            loss = criterion(out, label)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * raw.size(0)

        # Validation
        model.eval()
        preds, targets = [], []
        val_loss = 0
        with torch.no_grad():
            for raw, hybrid, label in val_loader:
                raw, hybrid = raw.to(device), hybrid.to(device)
                label = label.unsqueeze(1).to(device)
                out = model(raw, hybrid)
                loss = criterion(out, label)
                val_loss += loss.item() * raw.size(0)
                pred = (torch.sigmoid(out) > 0.5).int()
                preds.extend(pred.cpu().numpy())
                targets.extend(label.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader.dataset)
        val_acc = accuracy_score(targets, preds)
        val_f1 = f1_score(targets, preds)

        print(f"📉 Epoch {epoch:03d} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc*100:.2f}% | F1 Score: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), "best_cnnlstm_gdst_ae.pth")
            print("💾 Best model saved.")
        else:
            patience_counter += 1
            print(f"⏳ No improvement. Patience {patience_counter}/{PATIENCE}")
            if patience_counter >= PATIENCE:
                print("🛑 Early stopping triggered.")
                break

    print(f"\n🎯 Final Best Accuracy: {best_acc * 100:.2f}%")
    print(f"🏅 Final Best F1 Score: {best_f1:.4f}")


if __name__ == "__main__":
    train()



🚀 Using device: cpu
✅ Autoencoder pretrained.
📉 Epoch 001 | Val Loss: 0.2801 | Val Acc: 92.31% | F1 Score: 0.9600
💾 Best model saved.
📉 Epoch 002 | Val Loss: 0.2075 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 1/8
📉 Epoch 003 | Val Loss: 0.8024 | Val Acc: 46.15% | F1 Score: 0.5882
⏳ No improvement. Patience 2/8
📉 Epoch 004 | Val Loss: 0.2454 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 3/8
📉 Epoch 005 | Val Loss: 0.3120 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 4/8
📉 Epoch 006 | Val Loss: 0.3697 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 5/8
📉 Epoch 007 | Val Loss: 0.3463 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 6/8
📉 Epoch 008 | Val Loss: 0.3333 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 7/8
📉 Epoch 009 | Val Loss: 0.5018 | Val Acc: 92.31% | F1 Score: 0.9600
⏳ No improvement. Patience 8/8
🛑 Early stopping triggered.

🎯 Final Best Accuracy: 92.31%
🏅 Final Best F