# Hyperparameter Finetuning

We want to find the right parameters for the Generator Network.

By using the best values for the following parameters:

- Number of Epochs (meaning `num_epochs` and `num_steps`)
- Learning Rate
- Batch Size
- Number of Noise Batches
- Number of Layers
- Regularization term
- Number of Neurons for each Network

In [None]:
from src.fyemu_tunable import main, evaluate
import torch
import os
import torchvision.transforms as tt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import optuna
import random
from typing import Dict
from torchvision.models import resnet18

from src.metrics import kl_divergence_between_models

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
DEVICE

In [None]:
def load_models_dict(path: str="data/new/models") -> Dict[str, torch.nn.Module]:
    de = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = resnet18(num_classes = 10).to(de)
    
    # load all the models
    md = {}
    for list in os.listdir(path):
        
        model.load_state_dict(torch.load(f=os.path.join(path, list), weights_only=True))
        model.eval()
        md[len(md)] = model

Check out this little tutorial, to see how we handle the optimization using save states:

https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/001_rdb.html

In [None]:
import logging
import sys
import pickle
import optuna

# Add stream handler of stdout to show the messages
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "GeneratorOpti2"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

if os.path.exists("sampler2.pkl"):
    restored_sampler = pickle.load(open("sampler2.pkl", "rb"))
    study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True, sampler=restored_sampler)
else:
    study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True)

In [None]:
def objective(trial):

    opt_Epochs = trial.suggest_int('opt_Epochs', 1, 10)
    opt_Steps = trial.suggest_int('opt_Steps', 1, 20)
    opt_Learning_Rate = trial.suggest_float('opt_Learning_Rate', 0.01, 0.3)
    opt_Batch_Size = trial.suggest_int('opt_Batch_Size', 32, 512)
    opt_Number_of_Noise_Batches = trial.suggest_int('opt_Number_of_Noise_Batches', 1, 10)
    opt_Regularization_term = trial.suggest_float('opt_Regularization_term', 0.01, 0.3)
    opt_Noise_Dim = trial.suggest_int('opt_Noise_Dim', 1, 512)

    # print(f"Epochs: {opt_Epochs} |\nSteps: {opt_Steps} |\nLearning Rate: {opt_Learning_Rate} |\nBatch Size: {opt_Batch_Size} |\nNoise Batches: {opt_Number_of_Noise_Batches} |\nRegularization Term: {opt_Regularization_term} |\nNoise Dim: {opt_Noise_Dim}")
    n_layers = trial.suggest_int('n_layers', 1, 8)

    Layers = [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,]
    Layers = Layers[:n_layers]
    # print("Layers: ", Layers)

    mod = main(
        t_Epochs = opt_Epochs,
        t_Steps= opt_Steps,
        t_Learning_Rate = opt_Learning_Rate,
        t_Batch_Size = opt_Batch_Size,
        t_Number_of_Noise_Batches = opt_Number_of_Noise_Batches,
        t_Regularization_term = opt_Regularization_term,
        t_Layers = Layers,
        t_Noise_Dim = opt_Noise_Dim,
        new_baseline=False,
        logs=False,
        model_eval_logs=False,
    )
    
    data_dir = f'data{os.sep}cifar10'

    transform_test = tt.Compose([
        tt.ToTensor(),
        tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    valid_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
    valid_dl = DataLoader(valid_ds, 256,)

    exact = resnet18(num_classes = 10)
    n = random.randint(0, len(os.listdir("data/retrain/models"))-1)
    exact.load_state_dict(torch.load(f"data/retrain/models/ResNET18_CIFAR10_RETRAIN_CLASSES_{n}.pt", weights_only=True))
    div = kl_divergence_between_models(
        model1 = exact,
        model2 = mod,
        data_loader = valid_dl,
    )

    return div

study.optimize(objective, n_trials=10)

import pickle

# Save the sampler with pickle to be loaded later.
with open("sampler2.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

In [None]:
study.best_params

In [None]:
trials_df = study.trials_dataframe()
best10_df = trials_df.sort_values("value").head(10)
best10_df

In [None]:
print(f"Value {float(best10_df["value"].mean())}")
print(f"Number of Layers {float(best10_df["params_n_layers"].mean())} ")
print(f"Batch Size for Training {float(best10_df["params_opt_Batch_Size"].mean())} ")
print(f"Epochs for Noise Training {float(best10_df["params_opt_Epochs"].mean())} ")
print(f"LR for Noise Training {float(best10_df["params_opt_Learning_Rate"].mean())} ")
print(f"Noise Dim of Generator {float(best10_df["params_opt_Noise_Dim"].mean())}")
print(f"Number of Noise Batches Used {float(best10_df["params_opt_Number_of_Noise_Batches"].mean())} ")
print(f"Regularization Term {float(best10_df["params_opt_Regularization_term"].mean())} ")
print(f"Learning Steps for Noise Training {float(best10_df["params_opt_Steps"].mean())}")

^^^This will represent the values used as default^^^

___

### Standard Parameters

In [None]:
# n0 = 5000
# n2 = 5000
# batch_size = 128

# standard_model, standard_history = main(
#     t_Epochs = 5,
#     t_Steps= int((n0 + n2)/(2 * batch_size)), # The Idea is to have the same amount of updates as their are samples to unlearn
#     t_Learning_Rate = 0.1,
#     t_Batch_Size = batch_size,
#     t_Number_of_Noise_Batches = 10,
#     t_Regularization_term = 0.1,
#     t_Layers = [1000],
#     t_Noise_Dim = 100,
#     new_baseline=True,
#     logs=True,
# )

___

In [None]:
from torchvision.models import resnet18
import torch

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

train_ms = load_models_dict(path="data/all/models")

exact_ms = load_models_dict(path="data/retrain/models")

In [None]:
from src.metrics import kl_divergence_between_models
import os
import torchvision.transforms as tt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

batch_size = 256
data_dir = f'data{os.sep}cifar10'

transform_test = tt.Compose([
    tt.ToTensor(),
    tt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

valid_ds = ImageFolder(data_dir+f'{os.sep}test', transform_test)
valid_dl = DataLoader(valid_ds, batch_size, shuffle=False)

kl_divergence_between_models(model1 = train_ms[0], model2 = train_ms[0], data_loader = valid_dl)

In [None]:
kl_divergence_between_models(model1=exact_ms[0], model2=exact_ms[0], data_loader=valid_dl)

In [None]:
kl_divergence_between_models(model1=train_ms[0], model2=exact_ms[0], data_loader=valid_dl)

In [None]:
kl_divergence_between_models(model1=exact_ms[0], model2=train_ms[0], data_loader=valid_dl)