In [1]:
!git clone https://github.com/Jsrsky/machine_unlearning_experiments
%cd machine_unlearning_experiments/experiments/animal_faces/naive

Cloning into 'machine_unlearning_experiments'...
remote: Enumerating objects: 143, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 143 (delta 45), reused 117 (delta 32), pack-reused 0 (from 0)[K
Receiving objects: 100% (143/143), 26.48 MiB | 30.60 MiB/s, done.
Resolving deltas: 100% (45/45), done.
/kaggle/working/machine_unlearning_experiments/experiments/animal_faces/naive


## Dependecies

In [2]:
import sys
from pathlib import Path
from torchvision import datasets

## Utils

In [3]:
project_root = Path.cwd().resolve().parents[2]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))
    
data_root = project_root / 'data'
data_root.mkdir(parents=True, exist_ok=True)

from notebook_setup import setup_notebook
setup_notebook()

Notebook setup completed. Project root added to sys.path: /kaggle/working/machine_unlearning_experiments


In [4]:
data_root = Path('/kaggle/input/animal-faces')

In [5]:
# Device configuration
from utils.utils import DEVICE

print(f"Device used: {DEVICE}")

# Set random seed for reproducibility
from utils.utils import set_seed
set_seed()

# Prepare Dataloaders
from methods.naive.naive_utils import init_dataloaders

# Train loop
from utils.train_test_metrics import train_model

# Plot losses
from utils.train_test_metrics import plot_training_history

# Test function
from utils.train_test_metrics import test_model

# Merics 
from utils.train_test_metrics import show_metrics

# Init model
from models.effnetb0 import init_model_effnetb0

Device used: cuda


## Parameters (arbitrary chosen)

In [6]:
BATCH_SIZE = 32
LEARNING_RATE = 0.01
EPOCHS = 5

# Simple base training

## Init model

In [7]:
model, model_name, criterion, optimizer, transform = init_model_effnetb0(learning_rate=LEARNING_RATE, fc_output=3)

Init model...


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 124MB/s] 


Done initializing model.
Model ID: 134375036937760, Optimizer ID: 134375036937904, Criterion ID: 134375036944336


## Init dataloaders

In [8]:
dataset = datasets.ImageFolder(root=data_root/'afhq/train', transform=transform)
test_dataset = datasets.ImageFolder(root=data_root/'afhq/val', transform=transform)
data_split_path = 'afhq_data_splits.json'

train_loader, val_loader, test_loader, classes = init_dataloaders(
    datasets=(dataset, test_dataset),
    val_ratio=0.2,
    batch_size=BATCH_SIZE
    info_file_path = data_split_path,
    )

Prepare DataLoaders...
Done preparing DataLoaders.


## Call train

In [None]:
train_model(model, model_name, train_loader, val_loader, criterion, optimizer, num_epochs=EPOCHS)

Training Epoch 1/10: 100%|██████████| 366/366 [02:27<00:00,  2.48it/s]
Evaluating on validation set...: 100%|██████████| 92/92 [00:26<00:00,  3.51it/s]


Epoch [1/10], Train Loss: 0.0647, Train Accuracy: 0.9806, Val Loss: 0.0339, Val Accuracy: 0.9911
Epoch 1: New best validation accuracy: 0.9911. Model saved to EffNetB0_AFHQ_model.pth.


Training Epoch 2/10: 100%|██████████| 366/366 [02:26<00:00,  2.49it/s]
Evaluating on validation set...: 100%|██████████| 92/92 [00:25<00:00,  3.65it/s]


Epoch [2/10], Train Loss: 0.0292, Train Accuracy: 0.9914, Val Loss: 0.0411, Val Accuracy: 0.9874


Training Epoch 3/10:  44%|████▎     | 160/366 [01:04<01:21,  2.52it/s]

## Plot history losses

In [None]:
history_path = f'{model_name}_history.json'
plot_training_history(history_path)

## Call test

In [None]:
model_path = f"{model_name}_model.pth"
test_model(model, model_name, model_path, test_loader)

## Show metrics

In [None]:
predictions_path = f'{model_name}_predictions.json'
#classes = ['cat', 'dog', 'wildlife']
show_metrics(predictions_path, classes, model_name)

## Utils naive

In [None]:
# Select samples to unlearn (10% random)
from utils.utils import select_samples_to_unlearn

# Update data splits
from methods.naive.naive_utils import update_splits_after_unlearning

# Recreate Dataloaders from json files
from methods.naive.naive_utils import recreate_dataloaders

# Naive unlearning

### Init new model

In [None]:
data_splits_file = "afhq_data_splits.json"
unlearn_samples_file = "afhq_samples_to_unlearn.json"
updated_data_splits_path = "updated_afhq_data_splits.json"

In [None]:
model, model_name, criterion, optimizer, transform = init_model_effnetb0(learning_rate=LEARNING_RATE, fc_output=3)
model_name = "naive_unlearning_" + model_name

### Init data

In [None]:
dataset = datasets.ImageFolder(root=data_root/'afhq/train', transform=transform)
test_dataset = datasets.ImageFolder(root=data_root/'afhq/val', transform=transform)

## Select samples to unlearn

In [None]:
#ONLY ONCE
select_samples_to_unlearn(data_splits_file, unlearn_samples_file, unlearn_ratio=0.1)

## Update data splits -> delete samples

In [None]:
update_splits_after_unlearning(data_splits_file, unlearn_samples_file, updated_data_splits_path)

## Recreate DataLoaders from json files

In [None]:
train_loader, val_loader, test_loader, classes = recreate_dataloaders(
    data_splits_file=updated_data_splits_path,
    dataset=dataset, 
    batch_size=BATCH_SIZE)

## Re-train

In [None]:
train_model(model, model_name, train_loader, val_loader, criterion, optimizer, num_epochs=EPOCHS)

## Plot history losses

In [None]:
history_path = f'{model_name}_history.json'
plot_training_history(history_path)

## Re-test

In [None]:
model_path = f"{model_name}_model.pth"
test_model(model, model_name, model_path, test_loader)

## Show metrics

In [None]:
predictions_path = f'{model_name}_predictions.json'
#classes = ['cat', 'dog', 'wildlife']
show_metrics(predictions_path, classes, model_name)