# 📦 Cell 1: Importing Libraries and Setting Device
In this cell, we import necessary libraries, check for GPU availability, and set the device accordingly.

In [1]:
import os  # 📂 Import the 'os' module to interact with the operating system.
import subprocess  # 🚀 Import the 'subprocess' module for running external commands.

import pandas as pd  # 📊 Import 'pandas' library for data manipulation.
import torch  # 🔥 Import 'torch' library for PyTorch functionalities.
import torchvision  # 🖼️ Import 'torchvision' for computer vision utilities.
import torch.nn as nn  # 🧠 Import 'nn' module from 'torch' for neural network components.
import torch.optim as optim  # ⚙️ Import 'optim' module for optimization algorithms.
from torchvision.models import resnet18  # 📸 Import the ResNet-18 model from torchvision.
from torch.utils.data import DataLoader, Dataset  # 🧾 Import data-related components.

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'  # 🧭 Check and set the device to GPU if available, otherwise use CPU.

# 🚧 Cell 2: Accelerator Check
This cell checks if a GPU accelerator is available and raises an error if not.

In [2]:
#use p100
if DEVICE != 'cuda':
    raise RuntimeError('❗ Make sure you have added an accelerator (e.g., GPU) to your notebook; the submission will fail otherwise! 🚀')

# 📂 Cell 3: Dataset Loading and Preparation
Here, we define functions and classes for loading and preparing the dataset for training and validation.

In [3]:
def load_example(df_row):
    '''Load an example from the dataset.'''
    image = torchvision.io.read_image(df_row['image_path'])  # 📷 Read the image using torchvision.
    result = {
        'image': image,
        'image_id': df_row['image_id'],
        'age_group': df_row['age_group'],
        'age': df_row['age'],
        'person_id': df_row['person_id']
    }
    return result


class HiddenDataset(Dataset):
    '''The hidden dataset for training and validation.'''
    def __init__(self, split='train'):
        super().__init__()
        self.examples = []

        df = pd.read_csv(f'/kaggle/input/neurips-2023-machine-unlearning/{split}.csv')  # 📊 Read the dataset CSV.
        df['image_path'] = df['image_id'].apply(
            lambda x: os.path.join('/kaggle/input/neurips-2023-machine-unlearning/', 'images', x.split('-')[0], x.split('-')[1] + '.png'))
        df = df.sort_values(by='image_path')  # 📂 Sort the dataset by image path.
        df.apply(lambda row: self.examples.append(load_example(row)), axis=1)
        if len(self.examples) == 0:
            raise ValueError('No examples.')

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        '''Get an example from the dataset.'''
        example = self.examples[idx]
        image = example['image']
        image = image.to(torch.float32)  # 🌟 Convert image to float32.
        example['image'] = image
        return example


def get_dataset(batch_size):
    '''Get dataloaders for different dataset splits.'''
    retain_ds = HiddenDataset(split='retain')
    forget_ds = HiddenDataset(split='forget')
    val_ds = HiddenDataset(split='validation')

    retain_loader = DataLoader(retain_ds, batch_size=batch_size, shuffle=True)  # 📦 Create a DataLoader for 'retain' split.
    forget_loader = DataLoader(forget_ds, batch_size=batch_size, shuffle=True)  # 📦 Create a DataLoader for 'forget' split.
    validation_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=True)  # 📦 Create a DataLoader for 'validation' split.

    return retain_loader, forget_loader, validation_loader

# 🧠 Cell 4: Unlearning Function
This cell contains the unlearning function, which fine-tunes the model using a provided dataset.

In [4]:
def unlearning(
    net, 
    retain_loader, 
    forget_loader, 
    val_loader):
    '''Simple unlearning by finetuning.'''
    epochs = 1
    criterion = nn.CrossEntropyLoss()  # 📉 Define the loss function.
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)  # 🚀 Define the optimizer.
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)  # 📉 Learning rate scheduler.
    net.train()  # 🏋️ Set the network to training mode.

    for ep in range(epochs):
        net.train()  # 🏋️ Set the network to training mode.
        for sample in retain_loader:
            inputs = sample["image"]
            targets = sample["age_group"]
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)  # 🧭 Move data to the selected device (GPU or CPU).
        
            optimizer.zero_grad()  # 🧹 Zero the gradients.
            outputs = net(inputs)  # 🧠 Forward pass.
            loss = criterion(outputs, targets)  # 📉 Calculate the loss.
            loss.backward()  # ⏪ Backpropagate the gradients.
            optimizer.step()  # 🚀 Update model parameters.
        scheduler.step()  # 📉 Adjust learning rate using the scheduler.
        
    net.eval()  # 🧪 Set the network to evaluation mode when done training.

# 💾 Cell 5: Checkpoint Generation & Submission
In this cell, we create unlearned model checkpoints and ensure there are exactly 512 checkpoints.and handles the submission process, including creating the submission.zip file.

In [5]:
if os.path.exists('/kaggle/input/neurips-2023-machine-unlearning/empty.txt'):
    # 📦 Mock submission: Create an empty submission.zip file.
    subprocess.run('touch submission.zip', shell=True)
else:
    # 🚀 Create unlearned checkpoints outside of the working directory to avoid disk space issues.
    os.makedirs('/kaggle/tmp', exist_ok=True)

    # 🧾 Load the datasets and initialize the model.
    retain_loader, forget_loader, validation_loader = get_dataset(64)
    net = resnet18(weights=None, num_classes=10)
    net.to(DEVICE)

    # 🔄 Iterate to create unlearned checkpoints.
    for i in range(512):
        net.load_state_dict(torch.load('/kaggle/input/neurips-2023-machine-unlearning/original_model.pth'))  # 📂 Load the original model.
        unlearning(net, retain_loader, forget_loader, validation_loader)  # 🧠 Perform unlearning.
        state = net.state_dict()  # 📄 Get the model's state.
        torch.save(state, f'/kaggle/tmp/unlearned_checkpoint_{i}.pth')  # 💾 Save unlearned checkpoint.

    # 📏 Check the number of unlearned checkpoints to ensure it's 512.
    unlearned_ckpts = os.listdir('/kaggle/tmp')
    if len(unlearned_ckpts) != 512:
        raise RuntimeError('❗ Expected exactly 512 checkpoints. The submission will throw an exception otherwise.')

    # 📦 Create the submission.zip file containing the unlearned checkpoints.
    subprocess.run('zip submission.zip /kaggle/tmp/*.pth', shell=True)