# Machine Unlearning + Noise Generator

This is a copy of the original `Machine Unlearning.ipynb` notebook, with the key difference of using a different way of generating the noise.

In [1]:
# import required libraries
import numpy as np
import tarfile
import os
import math

import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torchvision.models import resnet18


train_new_one = False
# torch.manual_seed(100)
# After I optimize the Hyperparameters, I want to calculate at least 30 models, to chech the average performance
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda:0'

## Helper Functions

In [2]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

def training_step(model, batch):
    images, labels = batch
    images, labels = images.to(DEVICE), labels.to(DEVICE)
    out = model(images)                  
    loss = F.cross_entropy(out, labels) 
    return loss

def validation_step(model, batch):
    images, labels = batch
    images, labels = images.to(DEVICE), labels.to(DEVICE)
    out = model(images)                    
    loss = F.cross_entropy(out, labels)   
    acc = accuracy(out, labels)
    return {'Loss': loss.detach(), 'Acc': acc}

def validation_epoch_end(model, outputs):
    batch_losses = [x['Loss'] for x in outputs]
    epoch_loss = torch.stack(batch_losses).mean()   
    batch_accs = [x['Acc'] for x in outputs]
    epoch_acc = torch.stack(batch_accs).mean()      
    return {'Loss': epoch_loss.item(), 'Acc': epoch_acc.item()}

def epoch_end(model, epoch, result):
    print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
        epoch, result['lrs'][-1], result['train_loss'], result['Loss'], result['Acc']))
    
def distance(model,model0):
    distance=0
    normalization=0
    for (k, p), (k0, p0) in zip(model.named_parameters(), model0.named_parameters()):
        space='  ' if 'bias' in k else ''
        current_dist=(p.data0-p0.data0).pow(2).sum().item()
        current_norm=p.data0.pow(2).sum().item()
        distance+=current_dist
        normalization+=current_norm
    print(f'Distance: {np.sqrt(distance)}')
    print(f'Normalized Distance: {1.0*np.sqrt(distance/normalization)}')
    return 1.0*np.sqrt(distance/normalization)

In [3]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [validation_step(model, batch) for batch in val_loader]
    return validation_epoch_end(model, outputs)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)

    sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)
    
    for epoch in range(epochs): 
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = training_step(model, batch)
            train_losses.append(loss)
            loss.backward()
            
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            
            optimizer.step()
            optimizer.zero_grad()
            
            lrs.append(get_lr(optimizer))
            
        
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        epoch_end(model, epoch, result)
        history.append(result)
        sched.step(result['Loss'])
    return history

## Train/Load the Model

### load the dataset

In [4]:
# Dowload the dataset
if os.path.exists("data/cifar10"):
    dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
    download_url(dataset_url, '.')

    # Extract from archive
    with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
        tar.extractall(path='./data')
        
    # Look into the data directory
    data_dir = './data/cifar10'
    print(os.listdir(data_dir))
    classes = os.listdir(data_dir + "/train")
    print(classes)

Using downloaded and verified file: ./cifar10.tgz


  tar.extractall(path='./data')


['test', 'train']
['bird', 'deer', 'horse', 'automobile', 'frog', 'airplane', 'truck', 'cat', 'dog', 'ship']


In [5]:
transform_train = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [6]:
train_ds = ImageFolder(data_dir+'/train', transform_train)
valid_ds = ImageFolder(data_dir+'/test', transform_test)

In [7]:
batch_size = 256
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=3, pin_memory=True)
valid_dl = DataLoader(valid_ds, batch_size*2, num_workers=3, pin_memory=True)

### Train and save the model

In [8]:
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
model = resnet18(num_classes = 10).to(DEVICE)

epochs = 40
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4
opt_func = torch.optim.Adam

In [9]:
%%time
if os.exists("ResNET18_CIFAR10_ALL_CLASSES.pt"):
    history = fit_one_cycle(epochs, max_lr, model, train_dl, valid_dl, 
                                grad_clip=grad_clip, 
                                weight_decay=weight_decay, 
                                opt_func=opt_func)

    torch.save(model.state_dict(), "ResNET18_CIFAR10_ALL_CLASSES.pt")

AttributeError: module 'os' has no attribute 'exists'

### Testing the Model

In [10]:
if train_new_one:
    model.load_state_dict(torch.load("ResNET18_CIFAR10_ALL_CLASSES.pt"))
    history = [evaluate(model, valid_dl)]
    history

## Unlearning

___

Originally used:

In [11]:
# # defining the noise structure
# class Noise(nn.Module):
#     def __init__(self, *dim):
#         super().__init__()
#         self.noise = torch.nn.Parameter(torch.randn(*dim), requires_grad = True)
        
#     def forward(self):
#         return self.noise

Trying a different approach:

In [12]:
class NoiseGenerator(nn.Module):
    """
    A neural network module for generating noise patterns
    through a series of fully connected layers.
    """

    def __init__(
            self, 
            dim_out: list,
            dim_hidden: list = [1000],
            dim_start: int = 100,
            ):
        """
        Initialize the NoiseGenerator.

        Parameters:
            dim_out (list): The output dimensions for the generated noise.
            dim_hidden (list): The dimensions of hidden layers, defaults to [1000].
            dim_start (int): The initial dimension of random noise, defaults to 100.
        """
        super().__init__()
        self.dim = dim_out
        self.start_dims = dim_start  # Initial dimension of random noise

        # Define fully connected layers
        self.layers = {}
        self.layers["l1"] = nn.Linear(self.start_dims, dim_hidden[0])
        last = dim_hidden[0]
        for idx in range(len(dim_hidden)-1):
            self.layers[f"l{idx+2}"] = nn.Linear(dim_hidden[idx], dim_hidden[idx+1])
            last = dim_hidden[idx+1]

        # Define output layer
        self.f_out = nn.Linear(last, math.prod(self.dim))        

    def forward(self):
        """
        Forward pass to transform random noise into structured output.

        Returns:
            torch.Tensor: The reshaped tensor with specified output dimensions.
        """
        # Generate random starting noise
        x = torch.randn(self.start_dims)
        x = x.flatten()

        # Transform noise into learnable patterns
        for layer in self.layers.keys():
            x = self.layers[layer](x)
            x = torch.relu(x)

        # Apply output layer
        x = self.f_out(x)

        # Reshape tensor to the specified dimensions
        reshaped_tensor = x.view(self.dim)
        return reshaped_tensor

___

In [13]:
# list of all classes
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# classes which are required to un-learn
classes_to_forget = [0, 2]

In [14]:
# classwise list of samples
num_classes = 10
classwise_train = {}
for i in range(num_classes):
    classwise_train[i] = []

for img, label in train_ds:
    classwise_train[label].append((img, label))
    
classwise_test = {}
for i in range(num_classes):
    classwise_test[i] = []

for img, label in valid_ds:
    classwise_test[label].append((img, label))

In [15]:
# getting some samples from retain classes
num_samples_per_class = 1000

retain_samples = []
for i in range(len(classes)):
    if classes[i] not in classes_to_forget:
        retain_samples += classwise_train[i][:num_samples_per_class]
        

In [16]:
# retain validation set
retain_valid = []
for cls in range(num_classes):
    if cls not in classes_to_forget:
        for img, label in classwise_test[cls]:
            retain_valid.append((img, label))
            
# forget validation set
forget_valid = []
for cls in range(num_classes):
    if cls in classes_to_forget:
        for img, label in classwise_test[cls]:
            forget_valid.append((img, label))
            
forget_valid_dl = DataLoader(forget_valid, batch_size, num_workers=3, pin_memory=True)
retain_valid_dl = DataLoader(retain_valid, batch_size*2, num_workers=3, pin_memory=True)

### Training the Noise

In [17]:
# loading the model
model = resnet18(num_classes = 10).to(DEVICE)
model.load_state_dict(torch.load("ResNET18_CIFAR10_ALL_CLASSES.pt"))

  model.load_state_dict(torch.load("ResNET18_CIFAR10_ALL_CLASSES.pt"))


<All keys matched successfully>

In [18]:
%%time

if train_new_one:
    noises = {}
    for cls in classes_to_forget:
        print("Optiming loss for class {}".format(cls))
        noises[cls] = Noise(batch_size, 3, 32, 32)
        opt = torch.optim.Adam(noises[cls].parameters(), lr = 0.1)

        num_epochs = 5
        num_steps = 8
        class_label = cls
        for epoch in range(num_epochs):
            total_loss = []
            for batch in range(num_steps):
                inputs = noises[cls]()
                labels = torch.zeros(batch_size)+class_label
                outputs = model(inputs)
                loss = -F.cross_entropy(outputs, labels.long()) + 0.1*torch.mean(torch.sum(torch.square(inputs), [1, 2, 3]))
                opt.zero_grad()
                loss.backward()
                opt.step()
                total_loss.append(loss.cpu().detach().numpy())
            print("Loss: {}".format(np.mean(total_loss)))

CPU times: user 2 μs, sys: 0 ns, total: 2 μs
Wall time: 4.77 μs


## Impair Step

In [19]:
%%time

batch_size = 256
noisy_data = []
num_batches = 20
class_num = 0

for cls in classes_to_forget:
    for i in range(num_batches):
        batch = noises[cls]().cpu().detach()
        for i in range(batch[0].size(0)):
            noisy_data.append((batch[i], torch.tensor(class_num)))

other_samples = []
for i in range(len(retain_samples)):
    other_samples.append((retain_samples[i][0].cpu(), torch.tensor(retain_samples[i][1])))
noisy_data += other_samples
noisy_loader = torch.utils.data.DataLoader(noisy_data, batch_size=256, shuffle = True)


if train_new_one:
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.02)

    for epoch in range(1):  
        model.train(True)
        running_loss = 0.0
        running_acc = 0
        for i, data in enumerate(noisy_loader):
            inputs, labels = data
            inputs, labels = inputs,torch.tensor(labels)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item() * inputs.size(0)
            out = torch.argmax(outputs.detach(),dim=1)
            assert out.shape==labels.shape
            running_acc += (labels==out).sum().item()
        print(f"Train loss {epoch+1}: {running_loss/len(train_ds)},Train Acc:{running_acc*100/len(train_ds)}%")

NameError: name 'noises' is not defined

### Performance after Impair Step

In [20]:
if train_new_one:
    print("Performance of Standard Forget Model on Forget Class")
    history = [evaluate(model, forget_valid_dl)]
    print("Accuracy: {}".format(history[0]["Acc"]*100))
    print("Loss: {}".format(history[0]["Loss"]))

    print("Performance of Standard Forget Model on Retain Class")
    history = [evaluate(model, retain_valid_dl)]
    print("Accuracy: {}".format(history[0]["Acc"]*100))
    print("Loss: {}".format(history[0]["Loss"]))

## Repair Step

In [21]:
%%time

heal_loader = torch.utils.data.DataLoader(other_samples, batch_size=256, shuffle = True)
if train_new_one:
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)


    for epoch in range(1):  
        model.train(True)
        running_loss = 0.0
        running_acc = 0
        for i, data in enumerate(heal_loader):
            inputs, labels = data
            inputs, labels = inputs,torch.tensor(labels)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item() * inputs.size(0)
            out = torch.argmax(outputs.detach(),dim=1)
            assert out.shape==labels.shape
            running_acc += (labels==out).sum().item()
        print(f"Train loss {epoch+1}: {running_loss/len(train_ds)},Train Acc:{running_acc*100/len(train_ds)}%")

NameError: name 'other_samples' is not defined

### Performance after Repair Step

In [22]:
if train_new_one:
    print("Performance of Standard Forget Model on Forget Class")
    history = [evaluate(model, forget_valid_dl)]
    print("Accuracy: {}".format(history[0]["Acc"]*100))
    print("Loss: {}".format(history[0]["Loss"]))

    print("Performance of Standard Forget Model on Retain Class")
    history = [evaluate(model, retain_valid_dl)]
    print("Accuracy: {}".format(history[0]["Acc"]*100))
    print("Loss: {}".format(history[0]["Loss"]))

In [23]:
from typing import Dict

def load_models_dict(path: str="data/new/models") -> Dict[str, torch.nn.Module]:
    de = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = resnet18(num_classes = 10).to(de)
    
    # load all the models
    md = {}
    for list in os.listdir(path):
        
        model.load_state_dict(torch.load(f=os.path.join(path, list), weights_only=True))
        model.eval()
        md[len(md)] = model

    return md

In [25]:
from src.fyemu_tunable import main

if True:
    for i in range(1):
        model = main()
        # TODO
        # After Finetuning is done
        # write this !!!!!!!!!!!!!!!
        # Save the model
        # if not os.path.exists("data/new/models"):
        #    os.makedirs("data/new/models")
        # n = len(os.listdir("data/new/models"))
        # torch.save(model.state_dict(), f"data/new/models/ResNET18_CIFAR10_UN_{n}.pt")

---Training new ResNet18---


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.39batch/s]


Epoch [0], last_lr: 0.01000, train_loss: 1.8150, val_loss: 1.3837, val_acc: 0.4840


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.53batch/s]


Epoch [1], last_lr: 0.01000, train_loss: 1.3237, val_loss: 1.2356, val_acc: 0.5569


Training...: 100%|██████████| 196/196 [00:13<00:00, 14.54batch/s]


Epoch [2], last_lr: 0.01000, train_loss: 1.0756, val_loss: 1.0671, val_acc: 0.6242


Training...: 100%|██████████| 196/196 [00:13<00:00, 14.97batch/s]


Epoch [3], last_lr: 0.01000, train_loss: 0.9295, val_loss: 1.0373, val_acc: 0.6430


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.63batch/s]


Epoch [4], last_lr: 0.01000, train_loss: 0.8425, val_loss: 0.9721, val_acc: 0.6575


Training...: 100%|██████████| 196/196 [00:13<00:00, 14.73batch/s]


Epoch [5], last_lr: 0.01000, train_loss: 0.7760, val_loss: 0.8967, val_acc: 0.6842


Training...: 100%|██████████| 196/196 [00:14<00:00, 13.45batch/s]


Epoch [6], last_lr: 0.01000, train_loss: 0.7330, val_loss: 0.9646, val_acc: 0.6755


Training...: 100%|██████████| 196/196 [00:13<00:00, 14.24batch/s]


Epoch [7], last_lr: 0.01000, train_loss: 0.6913, val_loss: 0.8313, val_acc: 0.7147


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.46batch/s]


Epoch [8], last_lr: 0.01000, train_loss: 0.6631, val_loss: 0.8726, val_acc: 0.7026


Training...: 100%|██████████| 196/196 [00:14<00:00, 13.12batch/s]


Epoch [9], last_lr: 0.01000, train_loss: 0.6406, val_loss: 0.8511, val_acc: 0.7092


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.50batch/s]


Epoch [10], last_lr: 0.01000, train_loss: 0.6187, val_loss: 0.8193, val_acc: 0.7229


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.62batch/s]


Epoch [11], last_lr: 0.01000, train_loss: 0.5948, val_loss: 0.7799, val_acc: 0.7355


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.44batch/s]


Epoch [12], last_lr: 0.01000, train_loss: 0.5750, val_loss: 0.8956, val_acc: 0.7005


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.36batch/s]


Epoch [13], last_lr: 0.01000, train_loss: 0.5627, val_loss: 0.7698, val_acc: 0.7373


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.98batch/s]


Epoch [14], last_lr: 0.01000, train_loss: 0.5488, val_loss: 0.8244, val_acc: 0.7230


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.50batch/s]


Epoch [15], last_lr: 0.01000, train_loss: 0.5327, val_loss: 0.7588, val_acc: 0.7386


Training...: 100%|██████████| 196/196 [00:13<00:00, 15.02batch/s]


Epoch [16], last_lr: 0.01000, train_loss: 0.5202, val_loss: 0.8447, val_acc: 0.7273


Training...: 100%|██████████| 196/196 [00:13<00:00, 14.99batch/s]


Epoch [17], last_lr: 0.01000, train_loss: 0.5159, val_loss: 0.8503, val_acc: 0.7205


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.57batch/s]


Epoch [18], last_lr: 0.01000, train_loss: 0.5033, val_loss: 0.8094, val_acc: 0.7300


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.20batch/s]


Epoch [19], last_lr: 0.01000, train_loss: 0.4908, val_loss: 0.8439, val_acc: 0.7290


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.33batch/s]


Epoch [20], last_lr: 0.00500, train_loss: 0.3288, val_loss: 0.7740, val_acc: 0.7622


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.41batch/s]


Epoch [21], last_lr: 0.00500, train_loss: 0.2863, val_loss: 0.8163, val_acc: 0.7537


Training...: 100%|██████████| 196/196 [00:12<00:00, 15.58batch/s]


Epoch [22], last_lr: 0.00500, train_loss: 0.2728, val_loss: 0.8696, val_acc: 0.7490


Training...: 100%|██████████| 196/196 [00:13<00:00, 14.92batch/s]


Epoch [23], last_lr: 0.00500, train_loss: 0.2602, val_loss: 0.8737, val_acc: 0.7518


Training...: 100%|██████████| 196/196 [00:14<00:00, 13.57batch/s]


Epoch [24], last_lr: 0.00250, train_loss: 0.1388, val_loss: 0.9159, val_acc: 0.7738


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.87batch/s]


Epoch [25], last_lr: 0.00250, train_loss: 0.0804, val_loss: 1.0484, val_acc: 0.7689


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.59batch/s]


Epoch [26], last_lr: 0.00250, train_loss: 0.0839, val_loss: 1.1328, val_acc: 0.7640


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.50batch/s]


Epoch [27], last_lr: 0.00250, train_loss: 0.0917, val_loss: 1.1386, val_acc: 0.7703


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.40batch/s]


Epoch [28], last_lr: 0.00125, train_loss: 0.0400, val_loss: 1.1208, val_acc: 0.7780


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.40batch/s]


Epoch [29], last_lr: 0.00125, train_loss: 0.0128, val_loss: 1.1888, val_acc: 0.7748


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.32batch/s]


Epoch [30], last_lr: 0.00125, train_loss: 0.0067, val_loss: 1.2339, val_acc: 0.7763


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.47batch/s]


Epoch [31], last_lr: 0.00125, train_loss: 0.0045, val_loss: 1.2747, val_acc: 0.7733


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.26batch/s]


Epoch [32], last_lr: 0.00063, train_loss: 0.0033, val_loss: 1.2798, val_acc: 0.7790


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.39batch/s]


Epoch [33], last_lr: 0.00063, train_loss: 0.0024, val_loss: 1.3059, val_acc: 0.7780


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.34batch/s]


Epoch [34], last_lr: 0.00063, train_loss: 0.0023, val_loss: 1.3201, val_acc: 0.7788


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.66batch/s]


Epoch [35], last_lr: 0.00063, train_loss: 0.0018, val_loss: 1.3253, val_acc: 0.7786


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.82batch/s]


Epoch [36], last_lr: 0.00031, train_loss: 0.0017, val_loss: 1.3333, val_acc: 0.7789


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.95batch/s]


Epoch [37], last_lr: 0.00031, train_loss: 0.0016, val_loss: 1.3473, val_acc: 0.7783


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.61batch/s]


Epoch [38], last_lr: 0.00031, train_loss: 0.0013, val_loss: 1.3514, val_acc: 0.7799


Training...: 100%|██████████| 196/196 [00:15<00:00, 12.51batch/s]


Epoch [39], last_lr: 0.00031, train_loss: 0.0012, val_loss: 1.3530, val_acc: 0.7788
---Training new Exact Unlearned ResNet18---


Training...: 100%|██████████| 157/157 [00:12<00:00, 12.87batch/s]


Epoch [0], last_lr: 0.01000, train_loss: 1.6355, val_loss: 1.3506, val_acc: 0.4932


Training...: 100%|██████████| 157/157 [00:12<00:00, 12.83batch/s]


Epoch [1], last_lr: 0.01000, train_loss: 1.1483, val_loss: 1.2730, val_acc: 0.5452


Training...: 100%|██████████| 157/157 [00:12<00:00, 12.49batch/s]


Epoch [2], last_lr: 0.01000, train_loss: 0.9227, val_loss: 1.0540, val_acc: 0.6119


Training...: 100%|██████████| 157/157 [00:12<00:00, 12.70batch/s]


Epoch [3], last_lr: 0.01000, train_loss: 0.8070, val_loss: 0.7863, val_acc: 0.7172


Training...: 100%|██████████| 157/157 [00:12<00:00, 12.90batch/s]


Epoch [4], last_lr: 0.01000, train_loss: 0.7170, val_loss: 0.7961, val_acc: 0.7109


Training...: 100%|██████████| 157/157 [00:12<00:00, 12.60batch/s]


Epoch [5], last_lr: 0.01000, train_loss: 0.6573, val_loss: 0.7451, val_acc: 0.7326


Training...: 100%|██████████| 157/157 [00:10<00:00, 14.61batch/s]


Epoch [6], last_lr: 0.01000, train_loss: 0.6029, val_loss: 0.7351, val_acc: 0.7346


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.65batch/s]


Epoch [7], last_lr: 0.01000, train_loss: 0.5723, val_loss: 0.7761, val_acc: 0.7268


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.82batch/s]


Epoch [8], last_lr: 0.01000, train_loss: 0.5417, val_loss: 0.8113, val_acc: 0.7230


Training...: 100%|██████████| 157/157 [00:10<00:00, 14.78batch/s]


Epoch [9], last_lr: 0.01000, train_loss: 0.5223, val_loss: 0.6803, val_acc: 0.7534


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.60batch/s]


Epoch [10], last_lr: 0.01000, train_loss: 0.4954, val_loss: 0.6917, val_acc: 0.7568


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.56batch/s]


Epoch [11], last_lr: 0.01000, train_loss: 0.4739, val_loss: 0.6858, val_acc: 0.7570


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.68batch/s]


Epoch [12], last_lr: 0.01000, train_loss: 0.4613, val_loss: 0.6966, val_acc: 0.7624


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.37batch/s]


Epoch [13], last_lr: 0.01000, train_loss: 0.4432, val_loss: 0.7155, val_acc: 0.7497


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.72batch/s]


Epoch [14], last_lr: 0.00500, train_loss: 0.2938, val_loss: 0.6755, val_acc: 0.7848


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.66batch/s]


Epoch [15], last_lr: 0.00500, train_loss: 0.2469, val_loss: 0.7255, val_acc: 0.7768


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.73batch/s]


Epoch [16], last_lr: 0.00500, train_loss: 0.2349, val_loss: 0.7595, val_acc: 0.7837


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.32batch/s]


Epoch [17], last_lr: 0.00500, train_loss: 0.2231, val_loss: 0.7274, val_acc: 0.7782


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.84batch/s]


Epoch [18], last_lr: 0.00500, train_loss: 0.1990, val_loss: 0.8551, val_acc: 0.7633


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.71batch/s]


Epoch [19], last_lr: 0.00250, train_loss: 0.1018, val_loss: 0.8189, val_acc: 0.7950


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.84batch/s]


Epoch [20], last_lr: 0.00250, train_loss: 0.0457, val_loss: 0.9345, val_acc: 0.7888


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.58batch/s]


Epoch [21], last_lr: 0.00250, train_loss: 0.0371, val_loss: 1.0381, val_acc: 0.7847


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.73batch/s]


Epoch [22], last_lr: 0.00250, train_loss: 0.0666, val_loss: 1.0284, val_acc: 0.7781


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.86batch/s]


Epoch [23], last_lr: 0.00125, train_loss: 0.0383, val_loss: 0.9752, val_acc: 0.7946


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.45batch/s]


Epoch [24], last_lr: 0.00125, train_loss: 0.0102, val_loss: 1.0096, val_acc: 0.8001


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.67batch/s]


Epoch [25], last_lr: 0.00125, train_loss: 0.0046, val_loss: 1.0562, val_acc: 0.8001


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.50batch/s]


Epoch [26], last_lr: 0.00125, train_loss: 0.0038, val_loss: 1.1050, val_acc: 0.7981


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.62batch/s]


Epoch [27], last_lr: 0.00063, train_loss: 0.0025, val_loss: 1.1123, val_acc: 0.7971


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.14batch/s]


Epoch [28], last_lr: 0.00063, train_loss: 0.0017, val_loss: 1.1173, val_acc: 0.7969


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.43batch/s]


Epoch [29], last_lr: 0.00063, train_loss: 0.0016, val_loss: 1.1229, val_acc: 0.7971


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.64batch/s]


Epoch [30], last_lr: 0.00063, train_loss: 0.0019, val_loss: 1.1349, val_acc: 0.7976


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.74batch/s]


Epoch [31], last_lr: 0.00031, train_loss: 0.0013, val_loss: 1.1378, val_acc: 0.7991


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.74batch/s]


Epoch [32], last_lr: 0.00031, train_loss: 0.0014, val_loss: 1.1467, val_acc: 0.7977


Training...: 100%|██████████| 157/157 [00:09<00:00, 15.74batch/s]


Epoch [33], last_lr: 0.00031, train_loss: 0.0011, val_loss: 1.1566, val_acc: 0.7999


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.60batch/s]


Epoch [34], last_lr: 0.00031, train_loss: 0.0010, val_loss: 1.1640, val_acc: 0.7986


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.43batch/s]


Epoch [35], last_lr: 0.00016, train_loss: 0.0009, val_loss: 1.1569, val_acc: 0.7981


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.51batch/s]


Epoch [36], last_lr: 0.00016, train_loss: 0.0009, val_loss: 1.1768, val_acc: 0.7975


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.54batch/s]


Epoch [37], last_lr: 0.00016, train_loss: 0.0008, val_loss: 1.1717, val_acc: 0.7992


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.14batch/s]


Epoch [38], last_lr: 0.00016, train_loss: 0.0008, val_loss: 1.1722, val_acc: 0.7975


Training...: 100%|██████████| 157/157 [00:10<00:00, 15.27batch/s]


Epoch [39], last_lr: 0.00008, train_loss: 0.0007, val_loss: 1.1749, val_acc: 0.7978
[{'Loss': 1.352973222732544, 'Acc': 0.7788200974464417}]
Performance of Standard Forget Model on Forget Class
Accuracy: 1.1944110505282879
Loss: 7.286940574645996
Performance of Standard Forget Model on Retain Class
Accuracy: 69.08203363418579
Loss: 0.8631603717803955
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 9.606056213378906
Performance of Standard Forget Model on Retain Class
Accuracy: 71.56982421875
Loss: 0.810927152633667


___
## Evaluate multiple models

In [25]:
from PIL import Image
import tarfile
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as tt

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Firstly, all the data
class SubData(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        path, label = self.data[idx]

        img = Image.open(f"{path}").convert('RGB')
        img = self.transform(img)
        label = torch.tensor(label)
        return img.to(self.device), label.to(self.device)

data_dir = f'data{os.sep}cifar10'
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# classes which are required to un-learn
classes_to_forget = [0, 2]

transform_test = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_train = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [26]:
valid_all_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
valid_all_dl = DataLoader(valid_all_ds, 256,)

train_all_ds = ImageFolder(data_dir+f'{os.sep}train', transform_train)
train_all_dl = DataLoader(train_all_ds, 256,)

rt_tr = {}
for t, l in train_all_ds.imgs:
    if l not in classes_to_forget:
        rt_tr[len(rt_tr)] = (t, l)
rt_vl = {}
for t, l in valid_all_ds.imgs:
    if l not in classes_to_forget:
        rt_vl[len(rt_vl)] = (t, l)

train_retain_ds = SubData(rt_tr, transform_train)
valid_retain_ds = SubData(rt_vl, transform_test)

train_retain_dl = DataLoader(train_retain_ds, 256, shuffle=True)
valid_retain_dl = DataLoader(valid_retain_ds, 256*2)

rt_tr = {}
for t, l in train_all_ds.imgs:
    if l in classes_to_forget:
        rt_tr[len(rt_tr)] = (t, l)
rt_vl = {}
for t, l in valid_all_ds.imgs:
    if l in classes_to_forget:
        rt_vl[len(rt_vl)] = (t, l)

train_forget_ds = SubData(rt_tr, transform_train)
valid_forget_ds = SubData(rt_vl, transform_test)

train_forget_dl = DataLoader(train_forget_ds, 256, shuffle=True)
valid_forget_dl = DataLoader(train_forget_ds, 256*2)

loaders = {
    0: train_all_dl,
    1: valid_all_dl,
    2: train_retain_dl,
    3: valid_retain_dl,
    4: train_forget_dl,
    5: valid_forget_dl,
}

In [32]:
import src.metrics
from src.fyemu_tunable import evaluate
from torchvision.models import resnet18
paper_ms    = load_models_dict(path="data/paper/models")
gemu_ms     = load_models_dict(path="data/new/models")

exact = resnet18(num_classes = 10).to(DEVICE)
exact.load_state_dict(torch.load("ResNET18_CIFAR10_RETRAIN_CLASSES.pt", weights_only=True))

<All keys matched successfully>

In [28]:
def run(model, loaders):
    results = {}
    for name, loader in loaders.items():
        results[name] = evaluate(model, loader)

    return results
run(exact, loaders)

{0: {'Loss': 2.209007740020752, 'Acc': 0.8007015585899353},
 1: {'Loss': 3.2875797748565674, 'Acc': 0.626953125},
 2: {'Loss': 0.0003276505449321121, 'Acc': 1.0},
 3: {'Loss': 1.3938783407211304, 'Acc': 0.778637707233429},
 4: {'Loss': 11.116077423095703, 'Acc': 0.0},
 5: {'Loss': 11.099096298217773, 'Acc': 0.0}}

In [50]:
kl_divs_per_loader_gen = {}
for name, loader in loaders.items():
    kl_divs_per_loader_gen[name] = [src.metrics.kl_divergence_between_models(exact, gemu_ms[i], data_loader=loader, device=DEVICE) for i in gemu_ms.keys()]
kl_divs_per_loader_gen

                                                                   

{0: [0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433,
  0.8722768281491433],
 1: [0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0.9261725828051567,
  0

In [51]:
kl_divs_per_loader_paper = {}
for name, loader in loaders.items():
    kl_divs_per_loader_paper[name] = [src.metrics.kl_divergence_between_models(exact, paper_ms[i], data_loader=loader, device=DEVICE) for i in paper_ms.keys()]
kl_divs_per_loader_paper

                                                                 

{0: [1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093],
 1: [1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1.4823955297470093,
  1

In [18]:
import matplotlib.pyplot as plt
from typing import Dict, List, Literal

def create_boxplots(score_lists: Dict[str, List[float]], title: str = 'Box Plot of Accuracy Scores for Different Models', evaluation: Literal["Accuracy", "Loss"] = "Accuracy") -> None:
    """Create a box plot of accuracy scores for each parsed list in the diconary."""

    # Prepare data for the box plot
    data = [scores for scores in score_lists.values()]
    labels = list(score_lists.keys())

    # Create the box plot
    plt.figure(figsize=(10, 6))
    plt.boxplot(data, labels=labels, patch_artist=True)

    # Add labels and title
    plt.xlabel('Subsets')
    plt.xticks(rotation=30)
    plt.ylabel(f'{evaluation} Score')
    plt.ylim(0, 1.0)
    plt.title(title)

    # Display the plot
    plt.show()

### Accuracy Per Class

In [19]:
from PIL import Image
import tarfile
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as tt

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

data_classes = {}
for c in classes:
    data_classes[c] = {}
    for t, l in valid_all_ds.imgs:
        if l == c:
            data_classes[c][len(data_classes[c])] = (t, l)

data_dl_classes = {c: DataLoader(SubData(data_classes[c], transform_test), 256) for c in classes}

In [None]:
class_accs_paper = {c: [] for c in classes}
for idx, model in paper_ms.items():
    model.eval()
    accs = run(model, data_dl_classes)
    for c, acc in accs.items():
        class_accs_paper[c].append(acc[c])

In [None]:
class_accs_gemu = {c: [] for c in classes}
for idx, model in gemu_ms.items():
    model.eval()
    accs = run(model, data_dl_classes)
    for c, acc in accs.items():
        class_accs_gemu[c].append(acc[c])

In [None]:
create_boxplots(class_accs_paper, "Box Plot of Accuracy Scores for 30 Models using FYEMU", "Accuracy")

In [None]:
create_boxplots(class_accs_gemu, "Box Plot of Accuracy Scores for 30 Models using GEMU", "Accuracy")