# Hyperparameter Finetuning

We want to find the right parameters for the Generator Network.

By using the best values for the following parameters:

- Number of Epochs (meaning `num_epochs` and `num_steps`)
- Learning Rate
- Batch Size
- Number of Noise Batches
- Number of Layers
- Regularization term
- Number of Neurons for each Network

In [7]:
from src.fyemu_tunable import main, evaluate
import torch
import os
import torchvision.transforms as tt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import optuna
from torchvision.models import resnet18

from src.metrics import kl_divergence_between_models

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda:0'

Check out this little tutorial, to see how we handle the optimization using save states:

https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/001_rdb.html

In [8]:
import logging
import sys
import pickle
import optuna

# Add stream handler of stdout to show the messages
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "GeneratorOpti"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

if os.path.exists("sampler.pkl"):
    restored_sampler = pickle.load(open("sampler.pkl", "rb"))
    study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True, sampler=restored_sampler)
else:
    study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True)

[I 2025-01-09 12:42:59,088] Using an existing study with name 'GeneratorOpti' instead of creating a new one.


Using an existing study with name 'GeneratorOpti' instead of creating a new one.
Using an existing study with name 'GeneratorOpti' instead of creating a new one.


In [9]:
def objective(trial):

    opt_Epochs = trial.suggest_int('opt_Epochs', 1, 10)
    opt_Steps = trial.suggest_int('opt_Steps', 1, 20)
    opt_Learning_Rate = trial.suggest_float('opt_Learning_Rate', 0.01, 0.3)
    opt_Batch_Size = trial.suggest_int('opt_Batch_Size', 32, 512)
    opt_Number_of_Noise_Batches = trial.suggest_int('opt_Number_of_Noise_Batches', 1, 10)
    opt_Regularization_term = trial.suggest_float('opt_Regularization_term', 0.01, 0.3)
    opt_Noise_Dim = trial.suggest_int('opt_Noise_Dim', 1, 512)

    # print(f"Epochs: {opt_Epochs} |\nSteps: {opt_Steps} |\nLearning Rate: {opt_Learning_Rate} |\nBatch Size: {opt_Batch_Size} |\nNoise Batches: {opt_Number_of_Noise_Batches} |\nRegularization Term: {opt_Regularization_term} |\nNoise Dim: {opt_Noise_Dim}")

    l1 = trial.suggest_int('l1', 32, 1024)
    l2 = trial.suggest_int('l2', 32, 1024)
    l3 = trial.suggest_int('l3', 32, 1024)
    l4 = trial.suggest_int('l4', 32, 1024)
    l5 = trial.suggest_int('l5', 32, 1024)
    l6 = trial.suggest_int('l6', 32, 1024)
    l7 = trial.suggest_int('l7', 32, 1024)
    n_layers = trial.suggest_int('n_layers', 1, 7)

    Layers = [l1, l2, l3, l4, l5, l6, l7]
    Layers = Layers[:n_layers]
    # print("Layers: ", Layers)

    mod = main(
        t_Epochs = opt_Epochs,
        t_Steps= opt_Steps,
        t_Learning_Rate = opt_Learning_Rate,
        t_Batch_Size = opt_Batch_Size,
        t_Number_of_Noise_Batches = opt_Number_of_Noise_Batches,
        t_Regularization_term = opt_Regularization_term,
        t_Layers = Layers,
        t_Noise_Dim = opt_Noise_Dim,
        new_baseline=False,
        logs=False,
        model_eval_logs=True,
    )
    
    data_dir = f'data{os.sep}cifar10'

    transform_test = tt.Compose([
        tt.ToTensor(),
        tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    valid_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
    valid_dl = DataLoader(valid_ds, 256,)

    exact = resnet18(num_classes = 10)
    exact.load_state_dict(torch.load("ResNET18_CIFAR10_RETAIN_CLASSES.pt", weights_only=True))
    div = kl_divergence_between_models(
        model1 = mod,
        model2 = exact,
        data_loader = valid_dl,
    )

    return div

study.optimize(objective, n_trials=10)

import pickle

# Save the sampler with pickle to be loaded later.
with open("sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

[{'Loss': 1.5330902338027954, 'Acc': 0.7618482708930969}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.048828125
Loss: 8.005228042602539
Performance of Standard Forget Model on Retain Class
Accuracy: 70.1098620891571
Loss: 0.8384725451469421
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 9.946136474609375
Performance of Standard Forget Model on Retain Class
Accuracy: 71.23779058456421
Loss: 0.8103184103965759


[I 2025-01-09 12:43:59,474] Trial 118 finished with value: 0.8443588592112067 and parameters: {'opt_Epochs': 8, 'opt_Steps': 7, 'opt_Learning_Rate': 0.03040462053395056, 'opt_Batch_Size': 299, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.2446475930313835, 'opt_Noise_Dim': 362, 'l1': 476, 'l2': 358, 'l3': 397, 'l4': 345, 'l5': 715, 'l6': 136, 'l7': 958, 'n_layers': 4}. Best is trial 91 with value: 0.7797882512211798.


Trial 118 finished with value: 0.8443588592112067 and parameters: {'opt_Epochs': 8, 'opt_Steps': 7, 'opt_Learning_Rate': 0.03040462053395056, 'opt_Batch_Size': 299, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.2446475930313835, 'opt_Noise_Dim': 362, 'l1': 476, 'l2': 358, 'l3': 397, 'l4': 345, 'l5': 715, 'l6': 136, 'l7': 958, 'n_layers': 4}. Best is trial 91 with value: 0.7797882512211798.
Trial 118 finished with value: 0.8443588592112067 and parameters: {'opt_Epochs': 8, 'opt_Steps': 7, 'opt_Learning_Rate': 0.03040462053395056, 'opt_Batch_Size': 299, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.2446475930313835, 'opt_Noise_Dim': 362, 'l1': 476, 'l2': 358, 'l3': 397, 'l4': 345, 'l5': 715, 'l6': 136, 'l7': 958, 'n_layers': 4}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5330172777175903, 'Acc': 0.7615516781806946}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 8.209650993347168
Performance of Standard Forget 

[I 2025-01-09 12:45:08,260] Trial 119 finished with value: 0.9261899299919604 and parameters: {'opt_Epochs': 9, 'opt_Steps': 9, 'opt_Learning_Rate': 0.08399080535659484, 'opt_Batch_Size': 222, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.2583200742874406, 'opt_Noise_Dim': 298, 'l1': 346, 'l2': 195, 'l3': 464, 'l4': 178, 'l5': 580, 'l6': 636, 'l7': 891, 'n_layers': 6}. Best is trial 91 with value: 0.7797882512211798.


Trial 119 finished with value: 0.9261899299919604 and parameters: {'opt_Epochs': 9, 'opt_Steps': 9, 'opt_Learning_Rate': 0.08399080535659484, 'opt_Batch_Size': 222, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.2583200742874406, 'opt_Noise_Dim': 298, 'l1': 346, 'l2': 195, 'l3': 464, 'l4': 178, 'l5': 580, 'l6': 636, 'l7': 891, 'n_layers': 6}. Best is trial 91 with value: 0.7797882512211798.
Trial 119 finished with value: 0.9261899299919604 and parameters: {'opt_Epochs': 9, 'opt_Steps': 9, 'opt_Learning_Rate': 0.08399080535659484, 'opt_Batch_Size': 222, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.2583200742874406, 'opt_Noise_Dim': 298, 'l1': 346, 'l2': 195, 'l3': 464, 'l4': 178, 'l5': 580, 'l6': 636, 'l7': 891, 'n_layers': 6}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5303887128829956, 'Acc': 0.7619109153747559}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 8.67639446258545
Performance of Standard Forget M

[I 2025-01-09 12:46:01,918] Trial 120 finished with value: 0.8782543547451493 and parameters: {'opt_Epochs': 8, 'opt_Steps': 6, 'opt_Learning_Rate': 0.020842392495563473, 'opt_Batch_Size': 269, 'opt_Number_of_Noise_Batches': 5, 'opt_Regularization_term': 0.10866557192580548, 'opt_Noise_Dim': 223, 'l1': 232, 'l2': 332, 'l3': 661, 'l4': 110, 'l5': 403, 'l6': 80, 'l7': 697, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 120 finished with value: 0.8782543547451493 and parameters: {'opt_Epochs': 8, 'opt_Steps': 6, 'opt_Learning_Rate': 0.020842392495563473, 'opt_Batch_Size': 269, 'opt_Number_of_Noise_Batches': 5, 'opt_Regularization_term': 0.10866557192580548, 'opt_Noise_Dim': 223, 'l1': 232, 'l2': 332, 'l3': 661, 'l4': 110, 'l5': 403, 'l6': 80, 'l7': 697, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 120 finished with value: 0.8782543547451493 and parameters: {'opt_Epochs': 8, 'opt_Steps': 6, 'opt_Learning_Rate': 0.020842392495563473, 'opt_Batch_Size': 269, 'opt_Number_of_Noise_Batches': 5, 'opt_Regularization_term': 0.10866557192580548, 'opt_Noise_Dim': 223, 'l1': 232, 'l2': 332, 'l3': 661, 'l4': 110, 'l5': 403, 'l6': 80, 'l7': 697, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.530378818511963, 'Acc': 0.7618527412414551}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.09765625
Loss: 7.178658485412598
Performance of Standard

[I 2025-01-09 12:46:52,620] Trial 121 finished with value: 0.7917796719819307 and parameters: {'opt_Epochs': 9, 'opt_Steps': 5, 'opt_Learning_Rate': 0.056725458615474886, 'opt_Batch_Size': 244, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.23357176223315118, 'opt_Noise_Dim': 397, 'l1': 306, 'l2': 261, 'l3': 489, 'l4': 80, 'l5': 909, 'l6': 448, 'l7': 633, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 121 finished with value: 0.7917796719819307 and parameters: {'opt_Epochs': 9, 'opt_Steps': 5, 'opt_Learning_Rate': 0.056725458615474886, 'opt_Batch_Size': 244, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.23357176223315118, 'opt_Noise_Dim': 397, 'l1': 306, 'l2': 261, 'l3': 489, 'l4': 80, 'l5': 909, 'l6': 448, 'l7': 633, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 121 finished with value: 0.7917796719819307 and parameters: {'opt_Epochs': 9, 'opt_Steps': 5, 'opt_Learning_Rate': 0.056725458615474886, 'opt_Batch_Size': 244, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.23357176223315118, 'opt_Noise_Dim': 397, 'l1': 306, 'l2': 261, 'l3': 489, 'l4': 80, 'l5': 909, 'l6': 448, 'l7': 633, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5389434099197388, 'Acc': 0.7611905336380005}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 9.302387237548828
Performance of Standard Forge

[I 2025-01-09 12:47:58,997] Trial 122 finished with value: 0.8580988541245461 and parameters: {'opt_Epochs': 10, 'opt_Steps': 8, 'opt_Learning_Rate': 0.07506628360356285, 'opt_Batch_Size': 240, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.23367481981397564, 'opt_Noise_Dim': 383, 'l1': 389, 'l2': 261, 'l3': 520, 'l4': 94, 'l5': 551, 'l6': 493, 'l7': 624, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 122 finished with value: 0.8580988541245461 and parameters: {'opt_Epochs': 10, 'opt_Steps': 8, 'opt_Learning_Rate': 0.07506628360356285, 'opt_Batch_Size': 240, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.23367481981397564, 'opt_Noise_Dim': 383, 'l1': 389, 'l2': 261, 'l3': 520, 'l4': 94, 'l5': 551, 'l6': 493, 'l7': 624, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 122 finished with value: 0.8580988541245461 and parameters: {'opt_Epochs': 10, 'opt_Steps': 8, 'opt_Learning_Rate': 0.07506628360356285, 'opt_Batch_Size': 240, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.23367481981397564, 'opt_Noise_Dim': 383, 'l1': 389, 'l2': 261, 'l3': 520, 'l4': 94, 'l5': 551, 'l6': 493, 'l7': 624, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5308001041412354, 'Acc': 0.7618727087974548}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.048828125
Loss: 8.891597747802734
Performance of Standa

[I 2025-01-09 12:48:48,552] Trial 123 finished with value: 0.7848866954445838 and parameters: {'opt_Epochs': 9, 'opt_Steps': 5, 'opt_Learning_Rate': 0.05741691927134082, 'opt_Batch_Size': 233, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.218085448979945, 'opt_Noise_Dim': 406, 'l1': 296, 'l2': 309, 'l3': 485, 'l4': 80, 'l5': 913, 'l6': 456, 'l7': 578, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 123 finished with value: 0.7848866954445838 and parameters: {'opt_Epochs': 9, 'opt_Steps': 5, 'opt_Learning_Rate': 0.05741691927134082, 'opt_Batch_Size': 233, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.218085448979945, 'opt_Noise_Dim': 406, 'l1': 296, 'l2': 309, 'l3': 485, 'l4': 80, 'l5': 913, 'l6': 456, 'l7': 578, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 123 finished with value: 0.7848866954445838 and parameters: {'opt_Epochs': 9, 'opt_Steps': 5, 'opt_Learning_Rate': 0.05741691927134082, 'opt_Batch_Size': 233, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.218085448979945, 'opt_Noise_Dim': 406, 'l1': 296, 'l2': 309, 'l3': 485, 'l4': 80, 'l5': 913, 'l6': 456, 'l7': 578, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5419161319732666, 'Acc': 0.7609140276908875}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 8.56777286529541
Performance of Standard Forget Model

[I 2025-01-09 12:49:26,080] Trial 124 finished with value: 0.8691173888742924 and parameters: {'opt_Epochs': 9, 'opt_Steps': 2, 'opt_Learning_Rate': 0.04635952192198793, 'opt_Batch_Size': 228, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.26660772467861454, 'opt_Noise_Dim': 401, 'l1': 175, 'l2': 291, 'l3': 592, 'l4': 136, 'l5': 610, 'l6': 398, 'l7': 567, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 124 finished with value: 0.8691173888742924 and parameters: {'opt_Epochs': 9, 'opt_Steps': 2, 'opt_Learning_Rate': 0.04635952192198793, 'opt_Batch_Size': 228, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.26660772467861454, 'opt_Noise_Dim': 401, 'l1': 175, 'l2': 291, 'l3': 592, 'l4': 136, 'l5': 610, 'l6': 398, 'l7': 567, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 124 finished with value: 0.8691173888742924 and parameters: {'opt_Epochs': 9, 'opt_Steps': 2, 'opt_Learning_Rate': 0.04635952192198793, 'opt_Batch_Size': 228, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.26660772467861454, 'opt_Noise_Dim': 401, 'l1': 175, 'l2': 291, 'l3': 592, 'l4': 136, 'l5': 610, 'l6': 398, 'l7': 567, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5283762216567993, 'Acc': 0.7621046304702759}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 7.769351959228516
Performance of Standard Forge

[I 2025-01-09 12:50:16,844] Trial 125 finished with value: 0.8457309000194076 and parameters: {'opt_Epochs': 9, 'opt_Steps': 4, 'opt_Learning_Rate': 0.05988973559953498, 'opt_Batch_Size': 285, 'opt_Number_of_Noise_Batches': 6, 'opt_Regularization_term': 0.21713901758997334, 'opt_Noise_Dim': 424, 'l1': 283, 'l2': 224, 'l3': 479, 'l4': 188, 'l5': 910, 'l6': 447, 'l7': 663, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 125 finished with value: 0.8457309000194076 and parameters: {'opt_Epochs': 9, 'opt_Steps': 4, 'opt_Learning_Rate': 0.05988973559953498, 'opt_Batch_Size': 285, 'opt_Number_of_Noise_Batches': 6, 'opt_Regularization_term': 0.21713901758997334, 'opt_Noise_Dim': 424, 'l1': 283, 'l2': 224, 'l3': 479, 'l4': 188, 'l5': 910, 'l6': 447, 'l7': 663, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 125 finished with value: 0.8457309000194076 and parameters: {'opt_Epochs': 9, 'opt_Steps': 4, 'opt_Learning_Rate': 0.05988973559953498, 'opt_Batch_Size': 285, 'opt_Number_of_Noise_Batches': 6, 'opt_Regularization_term': 0.21713901758997334, 'opt_Noise_Dim': 424, 'l1': 283, 'l2': 224, 'l3': 479, 'l4': 188, 'l5': 910, 'l6': 447, 'l7': 663, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5293549299240112, 'Acc': 0.7618635892868042}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.09765625
Loss: 9.374564170837402
Performance of Standar

[I 2025-01-09 12:51:54,135] Trial 126 finished with value: 0.8923268221318723 and parameters: {'opt_Epochs': 10, 'opt_Steps': 12, 'opt_Learning_Rate': 0.0409680060765924, 'opt_Batch_Size': 257, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.22653067407969038, 'opt_Noise_Dim': 457, 'l1': 316, 'l2': 380, 'l3': 645, 'l4': 78, 'l5': 1023, 'l6': 453, 'l7': 507, 'n_layers': 6}. Best is trial 91 with value: 0.7797882512211798.


Trial 126 finished with value: 0.8923268221318723 and parameters: {'opt_Epochs': 10, 'opt_Steps': 12, 'opt_Learning_Rate': 0.0409680060765924, 'opt_Batch_Size': 257, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.22653067407969038, 'opt_Noise_Dim': 457, 'l1': 316, 'l2': 380, 'l3': 645, 'l4': 78, 'l5': 1023, 'l6': 453, 'l7': 507, 'n_layers': 6}. Best is trial 91 with value: 0.7797882512211798.
Trial 126 finished with value: 0.8923268221318723 and parameters: {'opt_Epochs': 10, 'opt_Steps': 12, 'opt_Learning_Rate': 0.0409680060765924, 'opt_Batch_Size': 257, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.22653067407969038, 'opt_Noise_Dim': 457, 'l1': 316, 'l2': 380, 'l3': 645, 'l4': 78, 'l5': 1023, 'l6': 453, 'l7': 507, 'n_layers': 6}. Best is trial 91 with value: 0.7797882512211798.
[{'Loss': 1.5363095998764038, 'Acc': 0.7613170742988586}]
Performance of Standard Forget Model on Forget Class
Accuracy: 0.0
Loss: 7.979055404663086
Performance of Standard For

[I 2025-01-09 12:52:57,283] Trial 127 finished with value: 0.81178173199296 and parameters: {'opt_Epochs': 9, 'opt_Steps': 9, 'opt_Learning_Rate': 0.06978916558517818, 'opt_Batch_Size': 211, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.19034861896063554, 'opt_Noise_Dim': 346, 'l1': 332, 'l2': 304, 'l3': 355, 'l4': 149, 'l5': 483, 'l6': 423, 'l7': 470, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


Trial 127 finished with value: 0.81178173199296 and parameters: {'opt_Epochs': 9, 'opt_Steps': 9, 'opt_Learning_Rate': 0.06978916558517818, 'opt_Batch_Size': 211, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.19034861896063554, 'opt_Noise_Dim': 346, 'l1': 332, 'l2': 304, 'l3': 355, 'l4': 149, 'l5': 483, 'l6': 423, 'l7': 470, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.
Trial 127 finished with value: 0.81178173199296 and parameters: {'opt_Epochs': 9, 'opt_Steps': 9, 'opt_Learning_Rate': 0.06978916558517818, 'opt_Batch_Size': 211, 'opt_Number_of_Noise_Batches': 7, 'opt_Regularization_term': 0.19034861896063554, 'opt_Noise_Dim': 346, 'l1': 332, 'l2': 304, 'l3': 355, 'l4': 149, 'l5': 483, 'l6': 423, 'l7': 470, 'n_layers': 5}. Best is trial 91 with value: 0.7797882512211798.


___

### Standard Parameters

In [None]:
# n0 = 5000
# n2 = 5000
# batch_size = 128

# standard_model, standard_history = main(
#     t_Epochs = 5,
#     t_Steps= int((n0 + n2)/(2 * batch_size)), # The Idea is to have the same amount of updates as their are samples to unlearn
#     t_Learning_Rate = 0.1,
#     t_Batch_Size = batch_size,
#     t_Number_of_Noise_Batches = 10,
#     t_Regularization_term = 0.1,
#     t_Layers = [1000],
#     t_Noise_Dim = 100,
#     new_baseline=True,
#     logs=True,
# )

___

In [10]:
from torchvision.models import resnet18
import torch

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

train = resnet18(num_classes = 10).to(DEVICE)
train.load_state_dict(torch.load("ResNET18_CIFAR10_ALL_CLASSES.pt",     weights_only=True))

exact = resnet18(num_classes = 10).to(DEVICE)
exact.load_state_dict(torch.load("ResNET18_CIFAR10_RETAIN_CLASSES.pt",  weights_only=True))

<All keys matched successfully>

In [11]:
from src.metrics import kl_divergence_between_models
import os
import torchvision.transforms as tt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

batch_size = 256
data_dir = f'data{os.sep}cifar10'

transform_test = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

valid_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
valid_dl = DataLoader(valid_ds, batch_size, shuffle=False)

kl_divergence_between_models(model1 = train, model2 = train, data_loader = valid_dl)

                                                                 

5.689457863167213e-11

In [12]:
kl_divergence_between_models(model1=exact, model2=exact, data_loader=valid_dl)

                                                                 

8.839433386642615e-11

In [13]:
kl_divergence_between_models(model1=train, model2=exact, data_loader=valid_dl)

                                                                 

3.39561687707901

In [14]:
kl_divergence_between_models(model1=exact, model2=train, data_loader=valid_dl)

                                                                 

3.2414690941572184