In [None]:
'''
experimental: Using Class-Specific Oversampling

** similar accuracy as model 4 ** no need to run this **

no of shots imply

'''

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import transforms
from collections import Counter
import numpy as np
import random
import os
from torchvision.datasets import ImageFolder
from matplotlib import pyplot as plt
import pennylane as qml
from pennylane.qnn import TorchLayer
from tqdm.notebook import tqdm
from imblearn.over_sampling import RandomOverSampler

# targeted data-augmentation
class CustomAugmentedDataset(torch.utils.data.Dataset):
    def __init__(self, base_dataset, target_classes, heavy_transform, default_transform):
        self.base_dataset = base_dataset
        self.target_classes = target_classes
        self.heavy_transform = heavy_transform
        self.default_transform = default_transform

        # Class-to-index mapping (e.g., {'Allaple.A': 0, 'Autorun.K': 1, ...})
        self.class_to_idx = base_dataset.class_to_idx
        self.idx_to_class = {v: k for k, v in self.class_to_idx.items()}

    def __len__(self):
        return len(self.base_dataset)

    def __getitem__(self, idx):
        image, label = self.base_dataset[idx]

        # If the class name matches target class, apply heavier transform
        class_name = self.idx_to_class[label]
        if class_name in self.target_classes:
            image = self.heavy_transform(image)
        else:
            image = self.default_transform(image)

        return image, label


# class for loss fucntion
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * ((1 - pt) ** self.gamma) * ce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

# Set seeds for reproducibility
def seed_all(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

seed_all(42)

# ========== DEVICE ==========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ========== PARAMETERS ==========
n_qubits = 6
batch_size = 16
num_classes = 25
num_epochs = 50
lr = 0.0005

# ========== TRANSFORMS WITH DATA AUGMENTATION ==========

default_transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

heavy_transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.RandomAffine(degrees=25, translate=(0.2, 0.2), scale=(0.8, 1.2)),
    transforms.RandomResizedCrop(128, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# # ✅ For training (with augmentation)
# train_transform = transforms.Compose([
#     transforms.Grayscale(1),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(10),
#     transforms.Resize((128, 128)),
#     transforms.ToTensor(),
#     transforms.Normalize((0.5,), (0.5,))
# ])

# ✅ For validation and test (no augmentation)
eval_transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


# ========== DATASETS ==========
# train_dataset = ImageFolder('/home/netsec1/dataset_folder/malimg_dataset/train', transform=train_transform)

# Base dataset first (ImageFolder)
base_train_dataset = ImageFolder('/home/netsec1/dataset_folder/malimg_dataset/train')

# Wrap it with the custom augmentor
target_classes = ['Autorun.K', 'C2LOP.P', 'Swizzor.gen!I']  # Add any more underperforming classes
train_dataset = CustomAugmentedDataset(
    base_dataset=base_train_dataset,
    target_classes=target_classes,
    heavy_transform=heavy_transform,
    default_transform=default_transform
)

val_dataset   = ImageFolder('/home/netsec1/dataset_folder/malimg_dataset/val', transform=eval_transform)
test_dataset  = ImageFolder('/home/netsec1/dataset_folder/malimg_dataset/test', transform=eval_transform)
print("**dataset loaded**")
# ========== CLASS WEIGHTS ==========
from sklearn.utils.class_weight import compute_class_weight

labels = [label for _, label in train_dataset.base_dataset.samples]
class_weights = compute_class_weight(class_weight='balanced',
                                     classes=np.unique(labels),
                                     y=labels)
class_wts = torch.tensor(class_weights, dtype=torch.float)

class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
#loss_fn = FocalLoss(alpha=class_weights_tensor, gamma=2)


# # Weighted sampler
# sample_weights = [class_weights[label] for _, label in train_dataset.samples]
# sampler = WeightedRandomSampler(sample_weights, len(sample_weights))

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)


# ========== QUANTUM CIRCUIT ==========
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch")
# def quantum_circuit(inputs, weights):
#     # Simplified circuit
#     for i in range(n_qubits):
#         qml.RY(inputs[i], wires=i)
    
#     # Single layer of rotations
#     for i in range(n_qubits):
#         qml.RY(weights[i], wires=i)
    
#     # Reduced entanglement
#     for i in range(n_qubits - 1):
#         qml.CNOT(wires=[i, i + 1])
    
#     return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

# weight_shapes = {"weights": (n_qubits,)}  # Reduced parameters

def quantum_circuit(inputs, weights):
    for i in range(n_qubits):
        qml.RY(inputs[i], wires=i)
    
    for l in range(weights.shape[0]):
        for i in range(n_qubits):
            qml.RY(weights[l][i], wires=i)
        for i in range(n_qubits - 1):
            qml.CNOT(wires=[i, i+1])
    
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

weight_shapes = {"weights": (3, n_qubits)}


# ========== CNN + QNN MODEL ==========
class FeatureReduce(nn.Module):
    def __init__(self, final_dim, dropout=0.4):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 8, 3, stride=2, padding=1),    # 128 -> 64
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Conv2d(8, 16, 3, stride=2, padding=1),   # 64 -> 32
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Conv2d(16, 32, 3, stride=2, padding=1),  # 32 -> 16
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Conv2d(32, 64, 3, stride=2, padding=1),  # 16 -> 8
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Conv2d(64, 128, 3, stride=2, padding=1),  # ⬅️ Extra block: 8 -> 4
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))                # 4×4 -> 1×1
        )
        self.fc = nn.Linear(128, final_dim)  # ⬅️ Changed from 64 to 128

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


class HybridQNN(nn.Module):
    def __init__(self, n_qubits, num_classes):
        super().__init__()
        self.feature_extractor = FeatureReduce(final_dim=n_qubits)
        self.q_layer = TorchLayer(quantum_circuit, weight_shapes)

        # Adding 4-layer MLP after quantum layer
        self.classifier = nn.Sequential(
            nn.Linear(n_qubits, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, num_classes)
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.tanh(x)
        q_out = torch.stack([self.q_layer(f) for f in x])
        return self.classifier(q_out)

# ========== TRAINING ==========
print("Starting training")

model = HybridQNN(n_qubits=n_qubits, num_classes=num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.005)
# Convert numpy array to tensor and move to device
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
# Now create the loss function
loss_fn = FocalLoss(alpha=1, gamma=1.5)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.5, 
    patience=5,
)

def train(model, dataloader, loss_fn, optimizer, device):
    model.train()
    total_loss, correct = 0.0, 0
    
    for inputs, labels in tqdm(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        
        # Clip gradients for stability
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        total_loss += loss.item()
        correct += (outputs.argmax(dim=1) == labels).sum().item()
    
    return total_loss / len(dataloader), correct / len(dataloader.dataset)


def evaluate(model, dataloader, loss_fn, device):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total  # This should never exceed 1.0
    return total_loss / len(dataloader), accuracy

# ========== RUN TRAINING WITH EARLY STOPPING ==========
train_losses, val_losses = [], []
train_accs, val_accs = [], []

# Early Stopping variables
early_stopping_patience = 7
best_val_loss = float('inf')
epochs_without_improvement = 0

print("Start training")

for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, loss_fn, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, loss_fn, device)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)
    scheduler.step(val_loss)
    
    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    # Visualization (bar plot of accuracy per epoch)
    plt.figure(figsize=(8, 4))
    plt.bar(["Train", "Val"], [train_acc, val_acc])
    plt.title(f"Epoch {epoch+1}")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.show()

    # ===== Early Stopping Logic =====
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_without_improvement = 0
        torch.save(model.state_dict() ,"best_model_ch5.pth")
        print("💾 Best model saved.")
    else:
        epochs_without_improvement += 1
        print(f"🕒 No improvement for {epochs_without_improvement} epoch(s).")

    if epochs_without_improvement >= early_stopping_patience:
        print(f"⏹️ Early stopping triggered after {epoch+1} epochs.")
        


**dataset loaded**
Starting training
Start training


  0%|          | 0/467 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# for additional 20 epoch training

model.load_state_dict(torch.load("best_model2.pth"))
print("✅ Loaded best model to continue training")

# You can also reinitialize optimizer if needed (optional)
# optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

# Continue training for more epochs
additional_epochs = 20
starting_epoch = 48

for epoch in range(starting_epoch + 1, starting_epoch + additional_epochs + 1):
    train_loss, train_acc = train(model, train_loader, loss_fn, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, loss_fn, device)

    print(f"Epoch {epoch}/{starting_epoch + additional_epochs} | "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    # Save best model again if val_loss improves (optional)
    if val_loss < min(val_losses, default=1e9):
        torch.save(model.state_dict(), "best_model.pth")
        print("💾 Best model saved.")

    # Record losses/accs
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    # Step the scheduler
    scheduler.step(val_loss)

    # Optional: plot after each epoch
    plt.figure(figsize=(8, 4))
    plt.bar(["Train", "Val"], [train_acc, val_acc])
    plt.title(f"Epoch {epoch}")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.show()


In [4]:
# printing other performance matrix
from sklearn.metrics import classification_report, confusion_matrix
import torch

model = HybridQNN(n_qubits=n_qubits, num_classes=num_classes).to(device)
model.load_state_dict(torch.load("best_model2.pth"))
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:  # or val_loader
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
print("done__")
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=test_dataset.classes))


done__
Classification Report:
                precision    recall  f1-score   support

     Adialer.C       1.00      1.00      1.00        14
     Agent.FYI       1.00      1.00      1.00        13
     Allaple.A       1.00      0.99      1.00       296
     Allaple.L       1.00      1.00      1.00       160
 Alueron.gen!J       0.95      1.00      0.98        21
     Autorun.K       0.00      0.00      0.00        12
       C2LOP.P       0.50      0.12      0.20        16
   C2LOP.gen!g       0.56      1.00      0.71        20
Dialplatform.B       1.00      1.00      1.00        20
     Dontovo.A       0.94      1.00      0.97        17
      Fakerean       1.00      0.97      0.99        39
 Instantaccess       0.96      1.00      0.98        44
    Lolyda.AA1       1.00      1.00      1.00        22
    Lolyda.AA2       1.00      1.00      1.00        21
    Lolyda.AA3       1.00      1.00      1.00        13
     Lolyda.AT       1.00      0.94      0.97        17
   Malex.gen!J   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# 🛠️ Recommendations:

# Augment data for underperforming classes.

# Try class-specific oversampling (or re-enable WeightedRandomSampler).

# Use confusion matrix to see which classes are being confused.

# Continue training (maybe unfreeze earlier layers if you’re using transfer learning).