# **Automated Machine Learning**

---

### **Torch Reptile - Parallel Metalearning**
*Fall 2020 | Ruduan B.F. Plug*

---

<font size="1">*Based on the Original Implementation by Alex Nichol & John Schulman [[1]](https://openai.com/blog/reptile/)*</font>

### Meta Libraries

In [204]:
# System Utility
import sys

# IPython Notebook Utilities
from IPython.display import clear_output
import tqdm.notebook as tqdm
clear_output()

print(sys.version)

3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]


### Packages

In [205]:
# Data Processing
import numpy as np
import pandas as pd

# Model Library
import tensorflow as tf

# Parallel Compute
import torch 
import torch.nn as nn

# Data Visualization
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter

# Utility Libraries
import random
import math
from time import time
from copy import deepcopy
from datetime import datetime

# Initialize Device
device = ('cuda' if torch.cuda.is_available() else 'cpu')
print("Torch Version\t", torch.__version__)

Torch Version	 2.9.0+cpu


### Configuration

In [206]:
data_folder = "data"
np.random.seed(int(time()))
torch.manual_seed(int(time()))

<torch._C.Generator at 0x29d3307c910>

### Reptile TensorFlow

#### Class Definition

In [207]:
class Reptile:

  def __init__(self, model, log, params):

    # Intialize Reptile Parameters
    self.inner_step_size = params[0]
    self.inner_batch_size = params[1]
    self.outer_step_size = params[2]
    self.outer_iterations = params[3]
    self.meta_batch_size = params[4]

    # Initialize Torch Model and Tensorboard
    self.model = model.to(device)
    self.log = log

  def reset(self):

    # Reset Training Gradients
    self.model.zero_grad()

  def train(self, X, y):

    # Train from Scratch
    self.reset()
    self.model.train()

    # Outer Training Loop
    for outer_iteration in tqdm.tqdm(range(self.outer_iterations)):

      # Track Current Weights
      current_weights = deepcopy(self.model.state_dict())

      # Sample a new Subtask
      (X_support, y_support), (X_query, y_query) = sample_task(
         X, y,
         allowed_classes=train_classes,
         n_way=N_WAY,
         k_shot=K_SHOT,
         query_size=QUERY_SIZE
      )

      # Inner Training Loop
      for inner_iteration in range(self.inner_batch_size):

        perm = torch.randperm(X_support.shape[0])

        # Process Meta Learning Batches
        for batch in range(0, X_support.shape[0], self.meta_batch_size):

          # Get Permuted Batch from Sample
          idx = perm[batch:batch + self.meta_batch_size]

          # Calculate Batch Loss
          self.model.zero_grad()
          batch_loss = self.loss(X_support[idx], y_support[idx])
          batch_loss.backward()

          # Update Model Parameters
          for theta in self.model.parameters():

            if theta.grad is not None:
                theta.data -= self.inner_step_size * theta.grad.data

      # Compute meta-loss
      with torch.no_grad():
         query_loss = self.loss(X_query, y_query)

      # Linear Cooling Schedule
      alpha = self.outer_step_size * (1 - outer_iteration / self.outer_iterations)

      # Get Current Candidate Weights
      candidate_weights = self.model.state_dict()

      # Transfer Candidate Weights to Model State Checkpoint
      state_dict = {candidate: (current_weights[candidate] + alpha * 
                               (candidate_weights[candidate] - current_weights[candidate])) 
                                for candidate in candidate_weights}
      self.model.load_state_dict(state_dict)
      
      # Log new Training Loss
      self.log.add_scalar('ModelEstimate/Loss', query_loss.item(), outer_iteration)

      # Log evaluation accuracy
      if outer_iteration % 50 == 0:
        mean_acc, _ = evaluate_episodes(self, X, y, allowed_classes=train_classes, n_episodes=50)
        self.log.add_scalar('Episode/MeanAccuracy', mean_acc, outer_iteration)

  def loss(self, x, y):

    # Calculate Torch Tensors
    x = x.to(device)
    y = y.to(device)

    # Compute model output
    logits = self.model(x)

    # Cross Entropy Loss
    calculatedLoss = nn.CrossEntropyLoss()
    output = calculatedLoss(logits, y)

    return output

  def predict(self, x):

    # Estimate using Torch Model
    t = torch.tensor(x, device = device, dtype = torch.float32)
    t = self.model(t)

    prediction = torch.argmax(t, dim=1)

    return prediction.cpu().numpy()

  def eval(self, X, y, allowed_classes, gradient_steps=5):
      self.model.eval()

      # Sample a task
      (X_support, y_support), (X_query, y_query) = sample_task(
         X, y,
         allowed_classes=allowed_classes,
         n_way=N_WAY,
         k_shot=K_SHOT,
         query_size=QUERY_SIZE
      )

      # Store Meta-Initialization Weights
      meta_weights = deepcopy(self.model.state_dict())

      # Calculate Estimate over Gradient Steps
      for step in range(gradient_steps):

        # Calculate Evaluation Loss and Backpropagate
        self.model.zero_grad()
        loss = self.loss(X_support, y_support)
        loss.backward()

        # Update Model Estimate Parameters
        for theta in self.model.parameters():

            if theta.grad is not None:
                theta.data -= self.inner_step_size * theta.grad.data

      # Get Estimate Loss over Evaluation
      with torch.no_grad():
          logits = self.model(X_query.to(device))
          predictions = torch.argmax(logits, dim=1)

      # Accuracy
      accuracy = (predictions == y_query.to(device)).float().mean().item()

      # Restore Meta-Initialization Weights
      self.model.load_state_dict(meta_weights)
      self.model.train()

      return accuracy

def evaluate_episodes(model, X, y, allowed_classes, n_episodes=100, gradient_steps=5):
    accs = []
    for _ in range(n_episodes):
        acc = model.eval(X, y, allowed_classes=allowed_classes, gradient_steps=gradient_steps)
        accs.append(acc)
    return float(np.mean(accs)), float(np.std(accs))

#### PyTorch Module

In [208]:
N_WAY = 3       # number of classes per episode
K_SHOT = 5      # support samples per class
QUERY_SIZE = 15  # query samples per class
HIDDEN_DIM = 128

class TorchModule(nn.Module):
    def __init__(self, input_dim, hidden_dim=HIDDEN_DIM, num_classes=N_WAY):
        super(TorchModule, self).__init__()

        self.input = nn.Linear(input_dim, hidden_dim)
        self.hidden = nn.Linear(hidden_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = torch.relu(self.input(x))
        x = torch.relu(self.hidden(x))
        return self.output(x)


### Learning Task

#### Task Sampler

In [209]:
def sample_task(X, y, allowed_classes, n_way=N_WAY, k_shot=K_SHOT, query_size=QUERY_SIZE):
    # Work with numpy view of labels
    y_np = y.cpu().numpy()
    needed = k_shot + query_size

    # Only classes that are allowed AND have enough samples
    valid_classes = [
        c for c in allowed_classes
        if np.sum(y_np == c) >= needed
    ]

    if len(valid_classes) < n_way:
        raise ValueError("Not enough valid classes for this N-way task")

    # Randomly choose n_way classes from the valid ones
    classes = np.random.choice(valid_classes, n_way, replace=False)

    support_X = []
    support_y = []
    query_X = []
    query_y = []

    # Map global label -> episodic label 0..n_way-1
    class_mapping = {int(c): i for i, c in enumerate(classes)}

    for c in classes:
        idx = np.where(y_np == c)[0]             # indices of this class
        chosen = np.random.choice(idx, needed, replace=False)

        support = chosen[:k_shot]
        query = chosen[k_shot:]

        # X is a torch tensor; numpy indices are fine
        support_X.append(X[support])
        query_X.append(X[query])

        # Episodic labels 0..n_way-1 as torch.long
        episodic_label = class_mapping[int(c)]
        support_y.append(torch.full((len(support),), episodic_label, dtype=torch.long))
        query_y.append(torch.full((len(query),), episodic_label, dtype=torch.long))

    X_support = torch.cat(support_X, dim=0)
    y_support = torch.cat(support_y, dim=0)
    X_query = torch.cat(query_X, dim=0)
    y_query = torch.cat(query_y, dim=0)

    return (X_support, y_support), (X_query, y_query)


### Dataset

#### Preprocessing

In [210]:
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
df = pd.read_csv("dataset/ML-EdgeIIoT-dataset-test.csv", low_memory=False)

# Encode Attack_type labels
encoder = LabelEncoder()
y = encoder.fit_transform(df["Attack_type"])

# Separate features and binary attack label
df = df.drop(columns=["Attack_label", "Attack_type"])

# Keep only numeric features
df = df.select_dtypes(include=["number"])
X = df.values

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

print("Dataset loaded:", X.shape, y.shape)

Dataset loaded: torch.Size([157800, 42]) torch.Size([157800])


#### Split Data

In [211]:
# Get all unique class labels
all_classes = np.unique(y.numpy())

# Fix seed for reproducibility
np.random.seed(42)

# Choose 80% classes for meta-training
num_meta_train = int(0.8 * len(all_classes))
train_classes = np.random.choice(all_classes, size=num_meta_train, replace=False)

# Remaining classes are test
test_classes = np.array([c for c in all_classes if c not in train_classes])

print("Train Classes:", train_classes)
print("Test Classes:", test_classes)

# Create dataset masks
train_mask = np.isin(y.numpy(), train_classes)
test_mask  = np.isin(y.numpy(), test_classes)

# Filter datasets
X_train = X[train_mask]
y_train = y[train_mask]

X_test  = X[test_mask]
y_test  = y[test_mask]

print("Train set:", X_train.shape, y_train.shape)
print("Test set:",  X_test.shape,  y_test.shape)

Train Classes: [ 9 11  0 13  5  8  2  1 14  4  7 10]
Test Classes: [ 3  6 12]
Train set: torch.Size([136070, 42]) torch.Size([136070])
Test set: torch.Size([21730, 42]) torch.Size([21730])


## Experiments

In [212]:
# Define Experiment Parameters
inner_step_size = 0.05
inner_batch_size = 32

outer_step_size = 0.1
outer_iterations = 3000
meta_batch_size = 32

params = [inner_step_size, inner_batch_size,
          outer_step_size, outer_iterations, meta_batch_size]

# Build Model
input_dim = X.shape[1]
log = SummaryWriter(data_folder)
reptile_model = Reptile(TorchModule(input_dim, hidden_dim=HIDDEN_DIM, num_classes=N_WAY), log, params)

# Train Model
reptile_model.train(X_train, y_train)

log.close()

# Zero-day evaluation on unseen test classes
mean_acc, std_acc = evaluate_episodes(
    reptile_model,
    X_test, y_test,
    allowed_classes=test_classes,
    n_episodes=200
)

ci = 1.96 * std_acc / np.sqrt(200)
print(f"Zero-Day {N_WAY}-Way Accuracy: {mean_acc:.4f} ± {ci:.4f}")

  0%|          | 0/3000 [00:00<?, ?it/s]

Zero-Day 3-Way Accuracy: 0.8079 ± 0.0105


### Results

In [213]:
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir data

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 996), started 3:06:01 ago. (Use '!kill 996' to kill it.)