In [None]:
import os
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.metrics import r2_score

sys.path.append(os.path.dirname(os.getcwd()))

### Correlations F1-silhouette with kmeans relevance strategy

In [None]:
dataset_name = ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene', 'BA2grid', 'BA2Motif', 'GridHouse', 'HouseColors']
name = 'GIN'
method= ['CAM']
cls='1'

for dataset in dataset_name:
    for meth in method: 
        if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:
            last = True
        else:
            last = False
            
        expl1r_path = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyKmeans/Expl_results_list_Lst{last}.csv'

        df = pd.read_csv(expl1r_path, index_col=1)
        corr = df['AccumulatorF1'].corr(df['AccumulatorKsilhouette'], method='pearson')

        print(f'Correlazione, {dataset}, {meth}, {round(corr, 3)}')

Mean silhouette value for CAM across datasets

In [None]:
dataset_name = ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene', 'BA2grid', 'BA2Motif', 'GridHouse', 'HouseColors']
name = 'GIN'
method= ['CAM']
cls='1'

for dataset in dataset_name:
    for meth in method: 
        if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:
            last = True
        else:
            last = False
            
        expl1r_path = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyKmeans/Expl_results_list_Lst{last}.csv'

        df = pd.read_csv(expl1r_path, index_col=1)

        sil = round(df['AccumulatorKsilhouette'].mean(), 3)

        print(f'Sil, {dataset}, {meth}, {sil}')

### Correlations F1-silhouette with fidelity relevance strategy

In [None]:
dataset_name = ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene', 'BA2grid', 'BA2Motif', 'GridHouse', 'HouseColors']
name = 'GIN'
method= ['CAM']
cls='1'

for dataset in dataset_name:
    for meth in method: 
        if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:
            last = True
        else:
            last = False
            
        expl1r_path = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyFidelityTh/Expl_results_list_Lst{last}.csv'

        df = pd.read_csv(expl1r_path, index_col=0)
        corr = df['AccumulatorF1'].corr(df['AccumulatorKsilhouette'], method='pearson')

        print(f'Correlazione, {dataset}, {meth}, {round(corr, 2)}')

Mean silhouette value for CAM across datasets

In [None]:
dataset_name = ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene', 'BA2grid', 'BA2Motif', 'GridHouse', 'HouseColors']
name = 'GIN'
method= ['CAM']
cls='1'

for dataset in dataset_name:
    for meth in method: 
        if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:
            last = True
        else:
            last = False
            
        expl1r_path = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyFidelityTh/Expl_results_list_Lst{last}.csv'

        df = pd.read_csv(expl1r_path, index_col=1)

        sil = round(df['AccumulatorKsilhouette'].mean(), 2)

        print(f'Sil, {dataset}, {meth}, {sil}')

### Plot F1b and F1r with both fidelity and kmeans relevance strategies

In [None]:
dataset_name = ['BA2grid', 'BA2Motif', 'HouseColors', 'GridHouse', 'AlkaneCarbonyl' , 'Mutagenicity', 'Benzene']
name = 'GIN'
method= ['RandomExplainer', 'CAM', 'IntegratedGradients', 'GNNExplainer']
strategy = ['FidelityTh', 'Kmeans']
cls='1'

stratContainer = defaultdict(list)

for strat in strategy:
    for meth in method: 
        for dataset in dataset_name:
            if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:        
                last = True
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyBestK/Expl_results_list_Lst{last}.csv'
            else:
                last = False

            if dataset in ['BA2Motif', 'HouseColors']:
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop5/Expl_results_list_Lst{last}.csv'
            if dataset == 'GridHouse':
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop14/Expl_results_list_Lst{last}.csv'
            if dataset == 'BA2grid':
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop9/Expl_results_list_Lst{last}.csv'

            expl_split = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/Strategy{strat}/Expl_results_list_Lst{last}.csv'

            df_best = pd.read_csv(expl_best, index_col=1)
            df_split = pd.read_csv(expl_split, index_col=1)
            df = df_best.join(df_split, lsuffix='B', rsuffix='S')

            F1s = round(df['AccumulatorF1S'].mean(),3)
            F1b = round(df['AccumulatorF1B'].mean(),3)
            
            stratContainer['Strategy'].append(strat)
            stratContainer['Algorithm'].append(meth)
            stratContainer['Dataset'].append(dataset)
            stratContainer['F1split'].append(F1s)
            stratContainer['F1bench'].append(F1b)

df = pd.DataFrame(stratContainer)

In [None]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)

SMALL_SIZE = 20
MEDIUM_SIZE = 30
BIGGER_SIZE = 35

plt.rc('font', size=MEDIUM_SIZE)          
plt.rc('axes', titlesize=MEDIUM_SIZE)     
plt.rc('axes', labelsize=MEDIUM_SIZE)    
plt.rc('xtick', labelsize=SMALL_SIZE)    
plt.rc('ytick', labelsize=SMALL_SIZE)    
plt.rc('legend', fontsize=MEDIUM_SIZE)   
plt.rc('figure', titlesize=BIGGER_SIZE)

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "Helvetica"
})

xl = list(np.linspace(0,1,100))
line = {'x':xl, 'y':xl}
dl = pd.DataFrame(line)
sns.color_palette(palette='colorblind')

g = sns.FacetGrid(data=df, col='Strategy', legend_out=True, height=9, aspect=1)
g.map_dataframe(sns.scatterplot, x="F1bench", y="F1split", hue="Algorithm", style="Dataset", s=150, alpha=0.8, legend=True, linewidth=1, edgecolor='black')

for item, ax in g.axes_dict.items():
    sns.lineplot(data=dl, x='x', y='y', alpha=0.3, ax = ax, legend=False)
    f1split = df.loc[df['Strategy']==item]['F1split']
    f1bench = df.loc[df['Strategy']==item]['F1bench']
    R2 = round(r2_score(f1split, f1bench),2)
    text = f'R² = {R2}'
    plt.text(0.2, 0.93, text,
     horizontalalignment='center',
     verticalalignment='center',
     transform = ax.transAxes)

axes = g.axes.flatten()
axes[0].set_title("$(a) \; \psi_{F}$")
axes[1].set_title("$(b) \; \psi_{C}$")
g.set_axis_labels("$\mathrm{F}_1^b$", "$\mathrm{F}_1^r$")
g.set(xlim=(0, 1), ylim=(0, 1), xticks=[0, 0.2, 0.4, 0.6, 0.8, 1], yticks=[0, 0.2, 0.4, 0.6, 0.8, 1])

k = g._legend_data
del k['Algorithm']
del k['Dataset']
algs = {}
datasets = {}
for label, item in k.items():
    item.set_sizes((200,200))
    if label in ['RandomExplainer','CAM','IntegratedGradients','GNNExplainer']:
        if label == 'RandomExplainer':
            item.set_sizes((300,300))
            algs['RExpl'] = item
        elif label == 'CAM':
            algs['CAM'] = item
        elif label =='IntegratedGradients':
            algs['IG'] = item
        elif label == 'GNNExplainer':
            algs['GNNE'] = item
    else:
        datasets[label] = item

g.add_legend(datasets, loc=(0.75, 0.29))
g.add_legend(algs, loc=(0.18, -0.01), ncol=4)
g.savefig('2legends.png', format='png', dpi=400)
g.fig

### Computing MSE

In [None]:
dataset_name = ['BA2grid', 'BA2Motif', 'HouseColors', 'GridHouse', 'AlkaneCarbonyl' , 'Mutagenicity', 'Benzene']
name = 'GIN'
method= ['RandomExplainer', 'CAM', 'IntegratedGradients', 'GNNExplainer']
strategy = ['Kmeans', 'FidelityTh']
cls='1'

res = defaultdict(list)

for strat in strategy:
    for meth in method: 
        for dataset in dataset_name:
            if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:        
                last = True
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyBestK/Expl_results_list_Lst{last}.csv'
            else:
                last = False

            if dataset in ['BA2Motif', 'HouseColors']:
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop5/Expl_results_list_Lst{last}.csv'
            if dataset == 'GridHouse':
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop14/Expl_results_list_Lst{last}.csv'
            if dataset == 'BA2grid':
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop9/Expl_results_list_Lst{last}.csv'

            expl_split = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/Strategy{strat}/Expl_results_list_Lst{last}.csv'

            df_best = pd.read_csv(expl_best, index_col=1)
            df_split = pd.read_csv(expl_split, index_col=1)
            df = df_best.join(df_split, lsuffix='B', rsuffix='S')

            error = df['AccumulatorF1B'] - df['AccumulatorF1S']
            se = error**2.
            mse = se.mean()
            stdmse = se.std()
            
            res['Algorithm'].append(meth)
            res['Dataset'].append(dataset)
            res['Strategy'].append(strat)
            res['MSE'].append(f'{round(mse, 3)}±{round(stdmse, 3)}')

df = pd.DataFrame(res)
df

In [None]:
#mse for kmeans strategy
final = df.loc[(df['Strategy'] == 'Kmeans') & (df['Algorithm'] == 'CAM')][['Dataset']].reset_index(drop=True)
for elem in ['RandomExplainer', 'CAM', 'IntegratedGradients', 'GNNExplainer']:
    myview = df.loc[(df['Strategy'] == 'Kmeans') & (df['Algorithm'] == elem)][['MSE']].reset_index(drop=True)
    myview = myview.rename(columns={"MSE": elem})
    final = pd.concat([final, myview], axis = 1)
final.to_csv('mse_kmeans.csv')

In [None]:
#mse for Fidelity stratey
final = df.loc[(df['Strategy'] == 'Kmeans') & (df['Algorithm'] == 'CAM')][['Dataset']].reset_index(drop=True)
for elem in ['RandomExplainer', 'CAM', 'IntegratedGradients', 'GNNExplainer']:
    myview = df.loc[(df['Strategy'] == 'FidelityTh') & (df['Algorithm'] == elem)][['MSE']].reset_index(drop=True)
    myview = myview.rename(columns={"MSE": elem})
    final = pd.concat([final, myview], axis = 1)
final.to_csv('mse_fidelity.csv')

### Computing and comparing retrieved explanations sizes

In [None]:
dataset_name = ['BA2grid', 'BA2Motif', 'HouseColors', 'GridHouse', 'AlkaneCarbonyl' , 'Mutagenicity', 'Benzene']
name = 'GIN'
method= ['CAM']
strategy = ['Kmeans', 'FidelityTh', 'Best']
cls='1'

res = defaultdict(list)

for strat in strategy:
    for meth in method: 
        for dataset in dataset_name:
            if dataset in ['AlkaneCarbonyl', 'Mutagenicity', 'Benzene']:        
                last = True
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyBestK/Expl_results_list_Lst{last}.csv'
            else:
                last = False

            if dataset in ['BA2Motif', 'HouseColors']:
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop5/Expl_results_list_Lst{last}.csv'
            if dataset == 'GridHouse':
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop14/Expl_results_list_Lst{last}.csv'
            if dataset == 'BA2grid':
                expl_best = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/StrategyTop9/Expl_results_list_Lst{last}.csv'

            if strat == 'Best':
                expl_split = expl_best
            else:
                expl_split = f'../experiments/{dataset}/GIN_{dataset}_CV/Explanations/{meth}/{cls}/Strategy{strat}/Expl_results_list_Lst{last}.csv'

            df_best = pd.read_csv(expl_best, index_col=1)
            df_split = pd.read_csv(expl_split, index_col=1)
            df = df_best.join(df_split, lsuffix='B', rsuffix='S')

            kavg = df['AccumulatorKS'].mean()
            kstd = df['AccumulatorKS'].std()
            res[dataset].append(f'{round(kavg, 0)}±{round(kstd, 0)}')

df = pd.DataFrame(res)

df