# Transfer Learning 

This notebook shows how VERONA can be used to create robustness distribution plots relevant in a transfer learning of robustness research setting. It creates robustness distribution plots over retrained layers and robustness distribution comparisons of adversarial and transfer learned networks.

In [None]:
import logging
from datetime import datetime
from pathlib import Path

import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
from autoverify.verifier import Verinet

import plotly.express as ex
import plotly.graph_objs as go

from ada_verona.database.machine_learning_model.network import Network
from ada_verona.database.machine_learning_model.onnx_network import ONNXNetwork
from ada_verona.database.machine_learning_model.pytorch_network import PyTorchNetwork

from ada_verona.analysis.report_creator import ReportCreator
from ada_verona.database.dataset.image_file_dataset import ImageFileDataset
from ada_verona.database.dataset.pytorch_experiment_dataset import PytorchExperimentDataset
from ada_verona.database.verification_context import VerificationContext
from ada_verona.dataset_sampler.predictions_based_sampler import PredictionsBasedSampler
from ada_verona.database.experiment_repository import ExperimentRepository
from ada_verona.epsilon_value_estimator.binary_search_epsilon_value_estimator import (
    BinarySearchEpsilonValueEstimator,
)
from ada_verona.verification_module.auto_verify_module import (
    AutoVerifyModule,
)
from ada_verona.verification_module.property_generator.one2any_property_generator import (
    One2AnyPropertyGenerator,
)

%matplotlib inline
torch.manual_seed(0)
logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=logging.DEBUG)

## Define Dataset

In [None]:
# define pytorch dataset. Preprocessing can be defined in the transform parameter
torch_dataset = torchvision.datasets.MNIST(root="data", train=False, download=True, transform=transforms.ToTensor())

# wrap pytorch dataset into experiment dataset to keep track of image id
experiment_dataset = PytorchExperimentDataset(dataset=torch_dataset)

# work on subset of the dataset to keep experiment small
experiment_dataset = experiment_dataset.get_subset([x for x in range(0, 10)])

## Component Setup

In [None]:
# define verifier
timeout = 300

# In this example, a one to any property generator is used. 
# That creates vnnlib files for one to any robustness queries
# A one to one property generator is also already implemented in the package and could be used here as well
# For the property generator, we have to define the number of classes, 
# the lower bound of the data and the upper bound of the data
property_generator = One2AnyPropertyGenerator(number_classes=10, data_lb=0, data_ub=10)

# In this example, Verinet is used. 
# All the other verifiers offered by the autoverify package can be used too in the AutoVerifyModule
verifier = AutoVerifyModule(verifier=Verinet(), timeout=timeout)

In [None]:
# To compute critical epsilon values, one can use the BinrrySearchEpsilonValueEstimator class
epsilon_value_list = [0.001,0.01,0.1,0.2,0.3,0.4]
epsilon_value_estimator = BinarySearchEpsilonValueEstimator(epsilon_value_list=epsilon_value_list, verifier=verifier)

In [None]:
# Setup Paths
experiment_repository_path = Path("../tests/test_transfer_learning")
network_folder = Path("../tests/test_transfer_learning/data/networks/")
experiment_repository = ExperimentRepository(experiment_repository_path, network_folder)
experiment_name = "TransferLearning"

In [None]:
# To compute the robustness of a network, one first has
# to check which data points are classified correctly.
# For that the PredictionsBasedSampler class is used
dataset_sampler = PredictionsBasedSampler(sample_correct_predictions=True)

In [None]:
# Setup Experiment
experiment_repository.initialize_new_experiment(experiment_name)
experiment_repository.save_configuration(
    dict(
        experiment_name=experiment_name,
        experiment_repository_path=str(experiment_repository_path),
        network_folder=str(network_folder),
        dataset=str(experiment_dataset),
        timeout=timeout,
        epsilon_list=[str(x) for x in epsilon_value_list],
    )
)

# Sampling Datapoints

In [None]:

network_list = experiment_repository.get_network_list("../tests/test_transfer_learning/networks.csv")
failed_networks = []

for network in network_list:
    # Here all the data points that are correctly predicted by the network are sampled
    try:
        sampled_data = dataset_sampler.sample(network, experiment_dataset)
    except Exception as e:
        logging.info(f"failed for network: {network} with error: {e}")
        failed_networks.append(network)
        continue

    for data_point in sampled_data:
        network_name = network.path.name.split(".")[0]
        # intermediate_result_path = Path(intermediate_result_base_path / f"{network_name}/image_{data_point.id}")

        verification_context = experiment_repository.create_verification_context(network, data_point, property_generator)

        epsilon_value_result = epsilon_value_estimator.compute_epsilon_value(verification_context)

        experiment_repository.save_result(epsilon_value_result)

        print(f"result: {epsilon_value_result}")
                    
        experiment_repository.save_result(epsilon_value_result)

#Save Plots
experiment_repository.save_plots()
logging.info(f"Failed for networks: {failed_networks}")

## Retraining Plot
Plot of robustness distributions per retrained layer

In [None]:
model_name = "convSmallRELU__DiffAI_transfer_emnist"

distributions = experiment_repository.get_result_df()

fig = ex.box(distributions[distributions["Model"].str.contains(model_name)], x="Model", y="Critical_epsilon", facet_col="retrained")
fig.update_xaxes(visible=False)
fig.update_layout({"height": 350, "margin": {"b": 0}})
fig.write_image(experiment_repository.get_results_path() / "retraining_plot.png")