# Installing Modules

In [361]:
# pip install pandas

In [362]:
# pip install scikit-learn

In [363]:
# pip install torch

In [364]:
# pip install xgboost

In [365]:
# pip install torchvision albumentations

In [366]:
# pip install lightgbm

In [367]:
# pip install matplotlib

In [368]:
# pip install seaborn

# Mean Teacher

In [369]:
# ✅ Step 0: Imports & Configuration

# === Core Libraries ===
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report

# === Reproducibility ===
torch.manual_seed(42)
np.random.seed(42)

# === Device ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [None]:
# ✅ Step 1: Load and Prepare the Data

# === Load Dataset ===
df = pd.read_csv("dataset.csv")

# === Prepare Features & Labels ===
X = df.drop(columns=["Class"]).values
y = df["Class"].values - 1  # Convert 1–5 to 0–4

# === 30% Test Split ===
X_dev, X_test, y_dev, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# === Split 20% of Dev as Labeled, 80% as Unlabeled ===
X_labeled, X_unlabeled, y_labeled, _ = train_test_split(
    X_dev, y_dev, test_size=0.8, stratify=y_dev, random_state=42
)

# === Normalize using only Labeled training data ===
scaler = StandardScaler()
X_labeled = scaler.fit_transform(X_labeled)
X_unlabeled = scaler.transform(X_unlabeled)
X_test = scaler.transform(X_test)


In [371]:
# ✅ Step 2: Dataset Wrappers

class LabeledDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class UnlabeledDataset(Dataset):
    def __init__(self, X):
        self.X = torch.tensor(X, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx]


In [372]:
# ✅ Step 3: Create Dataloaders

batch_size = 64

labeled_loader = DataLoader(LabeledDataset(X_labeled, y_labeled), batch_size=batch_size, shuffle=True)
unlabeled_loader = DataLoader(UnlabeledDataset(X_unlabeled), batch_size=batch_size, shuffle=True)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)


In [373]:
# ✅ Step 4A: Define the MLP Model

class MLP(nn.Module):
    def __init__(self, input_dim=470, output_dim=5):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.net(x)



In [374]:
# ✅ Step 4B: EMA Teacher Model Update Function

def update_ema_variables(student_model, teacher_model, alpha, global_step):
    alpha = min(1 - 1 / (global_step + 1), alpha)  # ramp up EMA at the beginning
    for ema_param, param in zip(teacher_model.parameters(), student_model.parameters()):
        ema_param.data.mul_(alpha).add_(param.data * (1 - alpha))


In [375]:
# ✅ Step 4C: Initialize Student and Teacher

# === Initialize student and teacher ===
student = MLP().to(device)
teacher = MLP().to(device)
teacher.load_state_dict(student.state_dict())  # Initially identical

optimizer = torch.optim.Adam(student.parameters(), lr=0.0007)
criterion = nn.CrossEntropyLoss()



In [376]:
# ✅ Step 5: Mean Teacher Training Loop

def mean_teacher_train(
    student, teacher,
    labeled_loader, unlabeled_loader,
    optimizer, criterion,
    device, ema_decay=0.99,
    lambda_u=1.0, epochs=100
):
    global_step = 0
    student.train()
    teacher.eval()

    for epoch in range(1, epochs + 1):
        total_loss, total_sup, total_cons = 0, 0, 0

        for (x_lab, y_lab), x_unlab in zip(labeled_loader, unlabeled_loader):
            x_lab, y_lab = x_lab.to(device), y_lab.to(device)
            x_unlab = x_unlab.to(device)

            # === Supervised loss (labeled data) ===
            logits_lab = student(x_lab)
            loss_sup = criterion(logits_lab, y_lab)

            # === Consistency loss (unlabeled data) ===
            with torch.no_grad():
                teacher_logits = teacher(x_unlab)
            student_logits = student(x_unlab)

            # KL divergence or MSE (either works, we'll use MSE here)
            loss_cons = F.mse_loss(
                F.softmax(student_logits, dim=1),
                F.softmax(teacher_logits, dim=1)
            )

            # === Total loss ===
            loss = loss_sup + lambda_u * loss_cons

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # === Update teacher model ===
            global_step += 1
            update_ema_variables(student, teacher, ema_decay, global_step)

            total_loss += loss.item()
            total_sup += loss_sup.item()
            total_cons += loss_cons.item()

        print(f"[Epoch {epoch:03d}] Total Loss: {total_loss:.4f} | Sup: {total_sup:.4f} | Cons: {total_cons:.4f}")



In [377]:
# ✅ Step 6: Train the Model

mean_teacher_train(
    student=student,
    teacher=teacher,
    labeled_loader=labeled_loader,
    unlabeled_loader=unlabeled_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    ema_decay=0.99,
    lambda_u=2.0,
    epochs=3000
)


[Epoch 001] Total Loss: 32.3376 | Sup: 31.4617 | Cons: 0.4379
[Epoch 002] Total Loss: 22.8473 | Sup: 21.0265 | Cons: 0.9104
[Epoch 003] Total Loss: 19.7543 | Sup: 17.4580 | Cons: 1.1482
[Epoch 004] Total Loss: 16.8276 | Sup: 14.5604 | Cons: 1.1336
[Epoch 005] Total Loss: 16.0778 | Sup: 13.1541 | Cons: 1.4618
[Epoch 006] Total Loss: 14.5275 | Sup: 11.8345 | Cons: 1.3465
[Epoch 007] Total Loss: 14.2040 | Sup: 11.3780 | Cons: 1.4130
[Epoch 008] Total Loss: 13.5724 | Sup: 10.5781 | Cons: 1.4971
[Epoch 009] Total Loss: 13.5552 | Sup: 10.2874 | Cons: 1.6339
[Epoch 010] Total Loss: 12.6131 | Sup: 9.6301 | Cons: 1.4915
[Epoch 011] Total Loss: 12.0966 | Sup: 9.1793 | Cons: 1.4587
[Epoch 012] Total Loss: 11.4458 | Sup: 8.9944 | Cons: 1.2257
[Epoch 013] Total Loss: 10.6160 | Sup: 8.3614 | Cons: 1.1273
[Epoch 014] Total Loss: 10.2409 | Sup: 8.0064 | Cons: 1.1172
[Epoch 015] Total Loss: 10.3025 | Sup: 8.5966 | Cons: 0.8530
[Epoch 016] Total Loss: 10.5061 | Sup: 7.7324 | Cons: 1.3869
[Epoch 017] Tot

In [378]:
# ✅ Step 7: Evaluate EMA Teacher on Test Set

teacher.eval()
with torch.no_grad():
    logits = teacher(X_test_tensor)
    preds = torch.argmax(logits, dim=1).cpu().numpy()
    y_true = y_test_tensor.cpu().numpy()

# === Metrics ===
acc = accuracy_score(y_true, preds)
f1 = f1_score(y_true, preds, average='macro')
report = classification_report(
    y_true, preds,
    target_names=['Adware', 'Banking', 'SMS', 'Riskware', 'Benign'],
    digits=4
)

# === Print Results ===
print("\n== Mean Teacher Test Evaluation ==")
print(f"Accuracy: {acc:.4f}")
print(f"Macro F1-score: {f1:.4f}")
print("Classification Report:\n", report)



== Mean Teacher Test Evaluation ==
Accuracy: 0.5750
Macro F1-score: 0.5862
Classification Report:
               precision    recall  f1-score   support

      Adware     0.2645    0.8112    0.3990       376
     Banking     0.5934    0.6302    0.6112       630
         SMS     0.9036    0.2801    0.4276      1171
    Riskware     0.7398    0.7631    0.7513       764
      Benign     0.7653    0.7199    0.7419       539

    accuracy                         0.5750      3480
   macro avg     0.6533    0.6409    0.5862      3480
weighted avg     0.7210    0.5750    0.5775      3480

