## Visualisation Notebook
The following notebook is used to generate visualisation of LRP for the thesis. 
By default, I will attempt to use vgg16 or vgg19 pretrained models, and keep things clean. 

## imports for the page

In [None]:
import sys
sys.path.append('..')
import torch
import torchvision.models as models
import numpy as np
import pandas as pd  
from experiments import WrapperNet, perform_lrp_plain, evaluate_performance, evaluate_explanations
# evaluate_performance, process_dataset, evaluate_explanations
from internal_utils import get_data_imagenette, get_data_imagenette, get_vgg16, get_vgg19, preprocess_images, imagenette_to_imagenet_label_mapping_fast, condense_to_heatmap
from tqdm import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TRUNCATE = 25
print(f"WARNING: TRUNCATING THE DATASET TO {TRUNCATE} --- THIS WILL ALSO BE TRUNCATED IN THE FUNCTIONS INCLUDED IN THE experiments/run_evaluation.py FILE")
print(f"TO RUN OVER THE ENTIRE DATASET, UNCOMMENT THE RELEVANT LINES IN THE FUNCTIONS (SEARCH FOR THE STRING 'TRUNCATE')")


### Plot comparative figure for the explanation evaluation suite

## Visualise Grid Image: 
Here, we simply perform LRP on a batch of imagenette data.
This is simply to add some color to the thesis, and isn't necessary important, so lots of work will be manual here.

In [None]:


def visualise_grid_image(batch_of_images, batch_of_labels, batch_of_labels_description, method_for_heatmap_generation, model_0, model_1):
    """Function to generate a pretty plot of 8 images and their corresponding heatmaps
    There should be 3 rows. The first row should contain original images, the second row should contain the heatmap by model_0
    and the third row should contain the heatmap by model_1.
    Each row should have a label describing its content.
    """
    # Get the heatmaps for both models
    classifications_0, heatmaps_0 = method_for_heatmap_generation(preprocess_images(batch_of_images), imagenette_to_imagenet_label_mapping_fast(batch_of_labels), model_0)
    classifications_1, heatmaps_1 = method_for_heatmap_generation(preprocess_images(batch_of_images), imagenette_to_imagenet_label_mapping_fast(batch_of_labels), model_1)
    heatmaps_0 = condense_to_heatmap(heatmaps_0)
    heatmaps_1 = condense_to_heatmap(heatmaps_1)

    # Number of images to display
    num_images = 4

    # Setting up the plot
    fig, axes = plt.subplots(3, num_images, figsize=(2 * num_images, 6))  # Three rows, num_images columns
    fig.suptitle('Image and Heatmap Visualizations')

    # Row labels
    row_labels = ['Raw Image', 'VGG16', 'VGG19']
    for i, label in enumerate(row_labels):
        fig.text(0.05, 0.75 - i * 0.25, label, va='center', ha='left', fontsize=12, rotation=45, transform=fig.transFigure)

    for i in range(num_images):
        # Display original images
        ax = axes[0, i]
        ax.imshow(batch_of_images[i].permute(1, 2, 0).cpu().numpy())
        ax.axis('off')

        # Display heatmaps from model 0
        ax = axes[1, i]
        ax.imshow(heatmaps_0[i], cmap='seismic')
        ax.axis('off')

        # Display heatmaps from model 1
        ax = axes[2, i]
        ax.imshow(heatmaps_1[i], cmap='seismic')
        ax.axis('off')

    plt.tight_layout(rect=[0.125, 0.03, 1, 0.95])
    plt.show()

train_data, test_data = get_data_imagenette(batch_size=4, shuffle=True)
batch_of_images, batch_of_labels = next(iter(test_data))
model_0 = WrapperNet(get_vgg16(), hybrid_loss=True)
model_1 = WrapperNet(get_vgg19(), hybrid_loss=True)
visualise_grid_image(batch_of_images, batch_of_labels, [], perform_lrp_plain, model_0, model_1)

    

# Run evaluation suite 

### Plotting functions here

In [None]:


def plot_comparative_figure(df, method_0, method_1, data_type="Train"):
    """
    Plot a comparative figure of the results between the two models.
    """
    figs_per_row = ["distance_noise_small", "distance_noise_large", "distance_blur_small", "distance_blur_large"]

    # Create a single row figure with two boxplots per column
    fig, axs = plt.subplots(1, len(figs_per_row), figsize=(20, 5), sharey=True)

    for j, fig_type in enumerate(figs_per_row):
        # Filter data for method_0
        if "small" in fig_type:
            df_method_0 = df[df[f"{method_0}_{fig_type}_class_change"] == False]
        else:
            df_method_0 = df[df[f"{method_0}_{fig_type}_class_change"] == True]
        
        # Filter data for method_1
        if "small" in fig_type:
            df_method_1 = df[df[f"{method_1}_{fig_type}_class_change"] == False]
        else:
            df_method_1 = df[df[f"{method_1}_{fig_type}_class_change"] == True]

        # Combine the data for boxplot using a hue for methods
        combined_df = pd.DataFrame({
            'Value': pd.concat([df_method_0[f"{method_0}_{fig_type}"], df_method_1[f"{method_1}_{fig_type}"]]),
            'Method': [method_0] * len(df_method_0) + [method_1] * len(df_method_1)
        })

        # Create boxplot
        sns.boxplot(x='Method', y='Value', hue= 'Method', data=combined_df, ax=axs[j])
        axs[j].set_title(f"{fig_type}".replace("_", " "))
    fig.suptitle(f"Comparative Analysis of {method_0} and {method_1} on {data_type} Data", fontsize=16)
    plt.tight_layout()
    plt.show()


### Evaluation driver method here

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

train_data, test_data = get_data_imagenette()
vgg16_results_train, vgg16_results_test = evaluate_performance(get_vgg16(), train_data, test_data)
vgg19_results_train, vgg19_results_test = evaluate_performance(get_vgg19(), train_data, test_data)

# visualise results from the initial dataframes
vgg16_results_train['model'] = 'VGG16'
vgg16_results_test['model'] = 'VGG16'
vgg19_results_train['model'] = 'VGG19'
vgg19_results_test['model'] = 'VGG19'

# Concatenate dataframes
combined_df_train = pd.concat([vgg16_results_train, vgg19_results_train])
combined_df_test = pd.concat([vgg16_results_test, vgg19_results_test])

# Melt the dataframe to long format for seaborn
melted_df_train = combined_df_train.melt(id_vars=['model'], var_name='metric', value_name='value')
melted_df_test = combined_df_test.melt(id_vars=['model'], var_name='metric', value_name='value')


# Create the boxplot
plt.figure(figsize=(12, 8))
sns.boxplot(x='metric', y='value', hue='model', data=melted_df_train)
plt.title('Model Performance Comparison')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.legend(title='Model (Train)')
plt.show()

# Create the boxplot
plt.figure(figsize=(12, 8))
sns.boxplot(x='metric', y='value', hue='model', data=melted_df_test)
plt.title('Model Performance Comparison')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.legend(title='Model (Trdy)')
plt.show()

# To evaluate the explanations, we need to pass in a list of "methods"
# each method is a tuple of the form (name, method, model)
# name is a string which identifies the method -- i.e. "VGG16"
# method is a function which generates a heatmap on a certain model --- i.e. perform_lrp_plain
# model is the model which the method is applied to --- it needs to be in the heatmap form (i.e.)
vgg16 = get_vgg16()
vgg19 = get_vgg19()
methods = [
        ("VGG16", perform_lrp_plain, WrapperNet(vgg16, hybrid_loss=True)),
        ("VGG19", perform_lrp_plain, WrapperNet(vgg19, hybrid_loss=True))
    ]
# now evaluate explanations
df_train, df_test = evaluate_explanations(train_data, test_data, methods, save_results = False)


In [None]:
plot_comparative_figure(df_test, "VGG16", "VGG19", "Test")
plot_comparative_figure(df_train, "VGG16", "VGG19", "Train")