# Hyperparameter Finetuning

We want to find the right parameters for the Generator Network.

By using the best values for the following parameters:

- Number of Epochs (meaning `num_epochs` and `num_steps`)
- Learning Rate
- Batch Size
- Number of Noise Batches
- Number of Layers
- Regularization term
- Number of Neurons for each Network

In [4]:
from src.fyemu_tunable import main
import torch
import os
import torchvision.transforms as tt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import optuna
from torchvision.models import resnet18

from src.metrics import kl_divergence_between_models

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [5]:

def objective(trial):

    global valid_dl

    opt_Epochs = trial.suggest_int('opt_Epochs', 1, 10)
    opt_Steps = trial.suggest_int('opt_Steps', 1, 10)
    opt_Learning_Rate = trial.suggest_float('opt_Learning_Rate', 0.01, 0.3)
    opt_Batch_Size = trial.suggest_int('opt_Batch_Size', 32, 256)
    opt_Number_of_Noise_Batches = trial.suggest_int('opt_Number_of_Noise_Batches', 1, 10)
    opt_Regularization_term = trial.suggest_float('opt_Regularization_term', 0.01, 0.3)
    opt_Noise_Dim = trial.suggest_int('opt_Noise_Dim', 1, 512)

    print(f"Epochs: {opt_Epochs} |\nSteps: {opt_Steps} |\nLearning Rate: {opt_Learning_Rate} |\nBatch Size: {opt_Batch_Size} |\nNoise Batches: {opt_Number_of_Noise_Batches} |\nRegularization Term: {opt_Regularization_term} |\nNoise Dim: {opt_Noise_Dim}")

    l1 = trial.suggest_int('l1', 32, 1024)
    l2 = trial.suggest_int('l2', 32, 1024)
    l3 = trial.suggest_int('l3', 32, 1024)
    l4 = trial.suggest_int('l4', 32, 1024)
    l5 = trial.suggest_int('l5', 32, 1024)
    l6 = trial.suggest_int('l6', 32, 1024)
    l7 = trial.suggest_int('l7', 32, 1024)
    l8 = trial.suggest_int('l8', 32, 1024)
    l9 = trial.suggest_int('l9', 32, 1024)
    n_layers = trial.suggest_int('n_layers', 1, 9)

    Layers = [l1, l2, l3, l4, l5, l6, l7, l8, l9]
    Layers = Layers[:1]
    print("Layers: ", Layers)

    mod = main(
        t_Epochs = opt_Epochs,
        t_Steps= opt_Steps,
        t_Learning_Rate = opt_Learning_Rate,
        t_Batch_Size = opt_Batch_Size,
        t_Number_of_Noise_Batches = opt_Number_of_Noise_Batches,
        t_Regularization_term = opt_Regularization_term,
        t_Layers = Layers,
        t_Noise_Dim = opt_Noise_Dim,
        new_baseline=False,
        logs=True,
        model_eval_logs=False,
    )
    
    data_dir = f'data{os.sep}cifar10'

    transform_test = tt.Compose([
        tt.ToTensor(),
        tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    valid_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
    valid_dl = DataLoader(valid_ds, 256,)

    exact = resnet18(num_classes = 10).to(DEVICE)
    exact.load_state_dict(torch.load("ResNET18_CIFAR10_RETAIN_CLASSES.pt", weights_only=True))
    div = kl_divergence_between_models(
        model1 = mod,
        model2 = exact,
        data_loader = valid_dl,
    )

    return div

In [6]:
study = optuna.create_study(study_name="GeneratorOpti")
study.optimize(objective, n_trials=5)

study.best_params

[I 2025-01-07 16:31:16,335] A new study created in memory with name: GeneratorOpti


Epochs: 10 |
Steps: 5 |
Learning Rate: 0.11465825454369884 |
Batch Size: 51 |
Noise Batches: 9 |
Regularization Term: 0.06553558993493315 |
Noise Dim: 329
Layers:  [477]
['test', 'train']
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
---Optimizing noise generator---
Optiming loss for class 0
Loss: 2694.703125
Loss: 9083.310546875
Loss: 20814.56640625
Loss: 6954.4912109375
Loss: 13898.6142578125
Loss: 4336.4326171875
Loss: 10435.720703125
Loss: 6321.22509765625
Loss: 4027.041748046875
Loss: 9768.16015625
Optiming loss for class 2
Loss: 3541.98095703125
Loss: 3303.551513671875
Loss: 4005.630126953125
Loss: 3512.423828125
Loss: 5116.18359375
Loss: 11598.4462890625
Loss: 17252.89453125
Loss: 7855.1533203125
Loss: 9938.1123046875
Loss: 11286.79296875
---Impairing Phase---
Train loss 1: 0.26202718601584435,Train Acc:8.0%
---Repairing Phase---
Train loss 1: 0.16379446777701379,Train Acc:10.128%


[I 2025-01-07 16:40:33,669] Trial 0 finished with value: 0.9663974999999996 and parameters: {'opt_Epochs': 10, 'opt_Steps': 5, 'opt_Learning_Rate': 0.11465825454369884, 'opt_Batch_Size': 51, 'opt_Number_of_Noise_Batches': 9, 'opt_Regularization_term': 0.06553558993493315, 'opt_Noise_Dim': 329, 'l1': 477, 'l2': 820, 'l3': 925, 'l4': 140, 'l5': 980, 'l6': 294, 'l7': 130, 'l8': 535, 'l9': 411, 'n_layers': 3}. Best is trial 0 with value: 0.9663974999999996.


Epochs: 4 |
Steps: 7 |
Learning Rate: 0.20207056383994298 |
Batch Size: 94 |
Noise Batches: 8 |
Regularization Term: 0.20960383025524518 |
Noise Dim: 472
Layers:  [387]
['test', 'train']
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
---Optimizing noise generator---
Optiming loss for class 0
Loss: 26018.478515625
Loss: 13825.0205078125
Loss: 18879.162109375
Loss: 21198.26171875
Optiming loss for class 2
Loss: 14665.984375
Loss: 60268.03515625
Loss: 29081.982421875
Loss: 57865.5390625
---Impairing Phase---
Train loss 1: 0.27501997631788255,Train Acc:8.838%
---Repairing Phase---
Train loss 1: 0.16986289055585863,Train Acc:9.86%


[I 2025-01-07 17:21:20,963] Trial 1 finished with value: 0.9721624999999997 and parameters: {'opt_Epochs': 4, 'opt_Steps': 7, 'opt_Learning_Rate': 0.20207056383994298, 'opt_Batch_Size': 94, 'opt_Number_of_Noise_Batches': 8, 'opt_Regularization_term': 0.20960383025524518, 'opt_Noise_Dim': 472, 'l1': 387, 'l2': 640, 'l3': 483, 'l4': 810, 'l5': 116, 'l6': 643, 'l7': 855, 'l8': 847, 'l9': 104, 'n_layers': 4}. Best is trial 0 with value: 0.9663974999999996.


Epochs: 1 |
Steps: 6 |
Learning Rate: 0.01756754949978878 |
Batch Size: 242 |
Noise Batches: 9 |
Regularization Term: 0.060799441872320475 |
Noise Dim: 217
Layers:  [618]
['test', 'train']
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
---Optimizing noise generator---
Optiming loss for class 0
Loss: 188.98255920410156
Optiming loss for class 2
Loss: 262.0127258300781
---Impairing Phase---
Train loss 1: 0.2305739527630806,Train Acc:16.452%
---Repairing Phase---
Train loss 1: 0.1238517887210846,Train Acc:11.888%


[I 2025-01-07 18:56:08,945] Trial 2 finished with value: 0.8117300000000001 and parameters: {'opt_Epochs': 1, 'opt_Steps': 6, 'opt_Learning_Rate': 0.01756754949978878, 'opt_Batch_Size': 242, 'opt_Number_of_Noise_Batches': 9, 'opt_Regularization_term': 0.060799441872320475, 'opt_Noise_Dim': 217, 'l1': 618, 'l2': 150, 'l3': 962, 'l4': 1000, 'l5': 547, 'l6': 432, 'l7': 75, 'l8': 768, 'l9': 772, 'n_layers': 4}. Best is trial 2 with value: 0.8117300000000001.


Epochs: 8 |
Steps: 2 |
Learning Rate: 0.13472848683720162 |
Batch Size: 73 |
Noise Batches: 10 |
Regularization Term: 0.24240655504737732 |
Noise Dim: 386
Layers:  [370]
['test', 'train']
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
---Optimizing noise generator---
Optiming loss for class 0
Loss: 11941.9521484375
Loss: 6315.853515625
Loss: 27457.3203125
Loss: 406.1610107421875
Loss: 11354.96484375
Loss: 2388.40625
Loss: 27567.55078125
Loss: 2804.00048828125
Optiming loss for class 2
Loss: 9550.390625
Loss: 12710.7119140625
Loss: 14950.0166015625
Loss: 61792.27734375
Loss: 17384.671875
Loss: 14655.3203125
Loss: 35956.27734375
Loss: 12491.017578125
---Impairing Phase---
Train loss 1: 0.23237045771837234,Train Acc:10.52%
---Repairing Phase---
Train loss 1: 0.13987509068131446,Train Acc:10.958%


[I 2025-01-07 19:05:00,424] Trial 3 finished with value: 0.889345 and parameters: {'opt_Epochs': 8, 'opt_Steps': 2, 'opt_Learning_Rate': 0.13472848683720162, 'opt_Batch_Size': 73, 'opt_Number_of_Noise_Batches': 10, 'opt_Regularization_term': 0.24240655504737732, 'opt_Noise_Dim': 386, 'l1': 370, 'l2': 880, 'l3': 452, 'l4': 738, 'l5': 548, 'l6': 932, 'l7': 911, 'l8': 548, 'l9': 852, 'n_layers': 1}. Best is trial 2 with value: 0.8117300000000001.


Epochs: 3 |
Steps: 4 |
Learning Rate: 0.15256516910032245 |
Batch Size: 184 |
Noise Batches: 4 |
Regularization Term: 0.23334428810975033 |
Noise Dim: 198
Layers:  [355]
['test', 'train']
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
---Optimizing noise generator---
Optiming loss for class 0
Loss: 11969.7744140625
Loss: 44739.9453125
Loss: 20514.64453125
Optiming loss for class 2
Loss: 8946.8994140625
Loss: 18545.568359375
Loss: 14834.4599609375
---Impairing Phase---
Train loss 1: 0.2647082072353363,Train Acc:9.782%
---Repairing Phase---
Train loss 1: 0.17230711503982543,Train Acc:10.112%


[I 2025-01-07 19:18:45,765] Trial 4 finished with value: 0.9220874999999998 and parameters: {'opt_Epochs': 3, 'opt_Steps': 4, 'opt_Learning_Rate': 0.15256516910032245, 'opt_Batch_Size': 184, 'opt_Number_of_Noise_Batches': 4, 'opt_Regularization_term': 0.23334428810975033, 'opt_Noise_Dim': 198, 'l1': 355, 'l2': 1000, 'l3': 187, 'l4': 198, 'l5': 390, 'l6': 89, 'l7': 357, 'l8': 358, 'l9': 486, 'n_layers': 8}. Best is trial 2 with value: 0.8117300000000001.


{'opt_Epochs': 1,
 'opt_Steps': 6,
 'opt_Learning_Rate': 0.01756754949978878,
 'opt_Batch_Size': 242,
 'opt_Number_of_Noise_Batches': 9,
 'opt_Regularization_term': 0.060799441872320475,
 'opt_Noise_Dim': 217,
 'l1': 618,
 'l2': 150,
 'l3': 962,
 'l4': 1000,
 'l5': 547,
 'l6': 432,
 'l7': 75,
 'l8': 768,
 'l9': 772,
 'n_layers': 4}

___

### Standard Parameters

In [8]:
# n0 = 5000
# n2 = 5000
# batch_size = 128

# standard_model, standard_history = main(
#     t_Epochs = 5,
#     t_Steps= int((n0 + n2)/(2 * batch_size)), # The Idea is to have the same amount of updates as their are samples to unlearn
#     t_Learning_Rate = 0.1,
#     t_Batch_Size = batch_size,
#     t_Number_of_Noise_Batches = 10,
#     t_Regularization_term = 0.1,
#     t_Layers = [1000],
#     t_Noise_Dim = 100,
#     new_baseline=True,
#     logs=True,
# )

___

In [9]:
from torchvision.models import resnet18
import torch

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

train = resnet18(num_classes = 10).to(DEVICE)
train.load_state_dict(torch.load("ResNET18_CIFAR10_ALL_CLASSES.pt",     weights_only=True))

exact = resnet18(num_classes = 10).to(DEVICE)
exact.load_state_dict(torch.load("ResNET18_CIFAR10_RETAIN_CLASSES.pt",  weights_only=True))

<All keys matched successfully>

In [10]:
from src.metrics import kl_divergence_between_models
import os
import torchvision.transforms as tt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

batch_size = 256
data_dir = f'data{os.sep}cifar10'

transform_test = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

valid_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
valid_dl = DataLoader(valid_ds, batch_size, shuffle=False)

kl_divergence_between_models(model1 = train, model2 = train, data_loader = valid_dl)

                                                                 

0.0

In [11]:
kl_divergence_between_models(model1=exact, model2=exact, data_loader=valid_dl)

                                                                 

0.0

In [12]:
kl_divergence_between_models(model1=train, model2=exact, data_loader=valid_dl)
# 407764079971736.25

                                                                 

3.395605000000001

In [13]:
kl_divergence_between_models(model1=exact, model2=train, data_loader=valid_dl)
# 336395501025684.5

                                                                 

3.2414749999999994