In [None]:
import optuna
import pandas as pd
import numpy as np
import yaml

from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch import tensor, cat, save, load, optim, nn
from torch.utils.data import DataLoader
import pickle
from sklearn.model_selection import train_test_split
from models.resnet18_model import ResNet18

ModuleNotFoundError: No module named 'optuna'

In [None]:
# Optuna objective
def Objective(trial):
    x = trial.suggest_float("x", -10, 10)
    return x**2/(10-x)

In [None]:
# Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(Objective, n_trials=50)

In [None]:
print("Best value:", study.best_value)
print("Best params:", study.best_params)

In [None]:
#-------------------#
#  Prepare dataset  #
#-------------------#
def loadDataset(data_cfg):
    dataset_name = data_cfg["dataset"]
    root = data_cfg["data_dir"]

    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert PIL image to Tensor
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
    ])

    trainset, testset = None, None
    if(dataset_name == "cifar10"):
        trainset = CIFAR10(root=root, train=True, download=True, transform=transform)
        testset = CIFAR10(root=root, train=False, download=True, transform=transform)
    else:
            raise ValueError(f"Unsupported dataset: {dataset_name}")

    assert trainset != None, "Failed loading the train set"
    assert testset != None, "Failed loading the test set"
    print("-- Dataset loaded: ", dataset_name, " --")
    return trainset, testset

def processDataset(train_cfg, trainset, testset):
    print("-- Processing dataset for training & auditing  --")
    
    train_data, test_data, train_targets, test_targets = toTensor(trainset, testset)

    assert train_data.shape[0] == 50000, "Train should have 50000 samples"
    assert test_data.shape[0] == 10000, "Test should have 10000 samples"
    assert train_data.shape[1] == 3, "Train Data should have 3 channels"
    assert test_data.shape[1] == 3, "Test Data should have 3 channels"
    assert train_data.max() >= 1 and train_data.min() >= 0, "Train Data should be normalized"
    assert test_data.max() >= 1 and test_data.min() >= 0, "Test Data should be normalized"

    data = cat([train_data.clone().detach(), test_data.clone().detach()], dim=0)
    targets = cat([train_targets, test_targets], dim=0)

    dataset = CifarInputHandler.UserDataset(data, targets)
    dataset_size = len(dataset)
    assert dataset_size == 60000, "Population dataset should contain 60000 samples"

    data_attrib = train_cfg["data"]
    train_attrib = train_cfg["train"]

    dataset_name = data_attrib["dataset"]
    file_path = "data/" + dataset_name + ".pkl"
    saveDataset(dataset, file_path)
    
    train_frac = data_attrib["f_train"]
    test_frac = data_attrib["f_test"]
    batch_size = train_attrib["batch_size"]

    print("-- Preparing dataset loaders --")
    train_indices, test_indices = splitDataset(dataset, train_frac, test_frac)
    train_loader, test_loader = prepareDataloaders(data, targets, train_indices, test_indices, batch_size)

    return train_loader, test_loader, train_indices, test_indices

with open("train.yaml", "r") as file:
        train_cfg = yaml.safe_load(file)

trainset, testset = loadDataset(train_cfg["data"])
train_loader, test_loader, train_indices, test_indices = processDataset(train_cfg, trainset, testset)

In [None]:
#------------------------#
#  Train baseline model  #
#------------------------#


In [None]:
# Save training and config metadata. Folder name index-hashed_config/logits, metadata, etc
form save-load import save, hashCfg


In [None]:
# Train shadow models