In [59]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as patches
import re

from Utilities.EvaluationMain import *
from Utilities.Utilities import ReadYaml, SerializeObjects, DeserializeObjects
from Models.Caller import *
from BatchMIEvaluation import LoadModelConfigs, LoadParams
from Utilities.Visualization import VisReconGivenZ_FCA, HeatMapFreqZ_FCA, VisReconGivenFC_ZA, VisReconExtractZ_FC

def Normalization(series):
    if np.issubdtype(series.dtype, np.number):  # Check if the series is numeric.
        return (series - series.min()) / (series.max() - series.min())  # Apply min-max normalization.
    return series  # Return the series unchanged if it's not numeric.


### Data Aggregation and Analysis Setup for Model Evaluation 

In [60]:
# Define lists of table names for different categories and conditions
TabList_ART_500 = ['SKZ_ART_30_500','SKZ_ART_50_500', 'ConVAE_ART_30', 'ConVAE_ART_50', 'SKZFC_ART_30_500', 'SKZFC_ART_50_500', 'TCMIDKZFC_ART_30_500', 'TCMIDKZFC_ART_50_500', 'FACFC_ART_30_500', 'FACFC_ART_50_500']
TabList_ART_800 = ['SKZ_ART_30_800','SKZ_ART_50_800', 'ConVAE_ART_30', 'ConVAE_ART_50', 'SKZFC_ART_30_800', 'SKZFC_ART_50_800', 'TCMIDKZFC_ART_30_800', 'TCMIDKZFC_ART_50_800', 'FACFC_ART_30_800', 'FACFC_ART_50_800']

TabList_II_500 = ['SKZ_II_30_500','SKZ_II_50_500', 'ConVAE_II_30', 'ConVAE_II_50', 'SKZFC_II_30_500', 'SKZFC_II_50_500', 'TCMIDKZFC_II_30_500', 'TCMIDKZFC_II_50_500', 'FACFC_II_30_500', 'FACFC_II_50_500']
TabList_II_800 = ['SKZ_II_30_800','SKZ_II_50_800', 'ConVAE_II_30', 'ConVAE_II_50', 'SKZFC_II_30_800', 'SKZFC_II_50_800', 'TCMIDKZFC_II_30_800', 'TCMIDKZFC_II_50_800', 'FACFC_II_30_800', 'FACFC_II_50_800']    

ARTBenchList = [ 'BaseVAE_ART_30',  'TCVAE_ART_30', 'FACVAE_ART_30', 'BaseVAE_ART_50',  'TCVAE_ART_50', 'FACVAE_ART_50']
IIBenchList = ['BaseVAE_II_30', 'TCVAE_II_30', 'FACVAE_II_30', 'BaseVAE_II_50', 'TCVAE_II_50', 'FACVAE_II_50']



# Retrieve table names from directories
TableList = os.listdir('./EvalResults/Tables/')
BMtableList = os.listdir('./Benchmarks/EvalResults/Tables/')

# Filter tables by 'Acc' keyword and specific file pattern
AcctableList = [tab for tab in TableList if 'Acc' in tab and 'Nj1_FC1.0.' in tab]
BMAcctableList = [tab for tab in BMtableList if 'Acc' in tab and 'Nj1.' in tab]

# Initialize DataFrames for accuracy tables
AcctableSet = pd.DataFrame() 
for tab in AcctableList:
    Acctables = pd.read_csv('./EvalResults/Tables/'+tab)
    AcctableSet = pd.concat([AcctableSet, Acctables], axis=0)

BMAcctableSet = pd.DataFrame() 
for tab in BMAcctableList:
    Acctables = pd.read_csv('./Benchmarks/EvalResults/Tables/'+tab)
    BMAcctableSet = pd.concat([BMAcctableSet, Acctables], axis=0)

# Combine accuracy tables from both sets
AcctableSet = pd.concat([AcctableSet, BMAcctableSet])

# Filter tables by 'MI' keyword and specific file pattern
MItableList = [tab for tab in TableList if 'MI' in tab and 'Nj1_FC1.0.' in tab]
BMMItableList = [tab for tab in BMtableList if 'MI' in tab and 'Nj1.' in tab]

# Initialize DataFrames for MI tables
MItableSet = pd.DataFrame() 
for tab in MItableList:
    MItables = pd.read_csv('./EvalResults/Tables/'+tab)
    MItableSet = pd.concat([MItableSet, MItables], axis=0)

BMMItableSet = pd.DataFrame() 
for tab in BMMItableList:
    MItables = pd.read_csv('./Benchmarks/EvalResults/Tables/'+tab)
    BMMItableSet = pd.concat([BMMItableSet, MItables], axis=0)

# Combine MI tables from both sets
MItableSet = pd.concat([MItableSet, BMMItableSet])

In [3]:
# Combine different tab lists for analysis
AnalTabList = TabList_ART_800 + TabList_ART_500 + TabList_II_500 + TabList_II_800 + ARTBenchList + IIBenchList

# Define the list of metrics for analysis
AnalMetricList = ['(i) I(V;Z)', 
                  '(ii) $I(V; \\acute{Z} \\mid Z)$',
                  '(iii) $I(V;\\acute{Z})$',
                  '(iv) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$',
                  '(v) $I(S;\\acute{Z})$',
                  '(vi) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$']

# Filter and reset the index for MI and Accuracy tables based on the Analysis Tab List
AnalMItableSet = MItableSet[MItableSet['Model'].isin(AnalTabList)].reset_index(drop=True)
AnalAcctableSet = AcctableSet[AcctableSet['Model'].isin(AnalTabList)].reset_index(drop=True)

# Normalizing and preparing the Accuracy table set
AnalAcctableSet['MAPEnorm'] = AnalAcctableSet['MAPEnorm'] / 100
AnalAcctableSet = AnalAcctableSet[['Model', 'MeanKldRes', 'MAPEnorm']].copy()
AnalAcctableSet.columns = ['Model', 'FQI', 'MAPE']

# Grouping and pivoting the MI table set
AnalMItableSet = MItableSet[(MItableSet['Model'].isin(AnalTabList))].reset_index(drop=True)
AnalMItableSet = AnalMItableSet.groupby(['Model', 'Metrics']).mean().reset_index().copy()
AnalMItableSet = AnalMItableSet[AnalMItableSet['Metrics'].isin(AnalMetricList)].reset_index(drop=True)
AnalMItableSet = pd.pivot(AnalMItableSet, index='Model', columns='Metrics', values='Values').reset_index().copy()


### Compilation and Normalization of Model Performance Metrics

In [4]:
# Combine different tab lists for analysis
AnalTabList = TabList_ART_800 + TabList_ART_500 + TabList_II_500 + TabList_II_800 + ARTBenchList + IIBenchList

# Define the list of metrics for analysis
AnalMetricList = ['(i) I(V;Z)', 
                  '(ii) $I(V; \\acute{Z} \\mid Z)$',
                  '(iii) $I(V;\\acute{Z})$',
                  '(iv) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$',
                  '(v) $I(S;\\acute{Z})$',
                  '(vi) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$']

# Filter and reset the index for MI and Accuracy tables based on the Analysis Tab List
AnalMItableSet = MItableSet[MItableSet['Model'].isin(AnalTabList)].reset_index(drop=True)
AnalAcctableSet = AcctableSet[AcctableSet['Model'].isin(AnalTabList)].reset_index(drop=True)

# Normalizing and preparing the Accuracy table set
AnalAcctableSet['MAPEnorm'] = AnalAcctableSet['MAPEnorm'] / 100
AnalAcctableSet = AnalAcctableSet[['Model', 'MeanKldRes', 'MAPEnorm']].copy()
AnalAcctableSet.columns = ['Model', 'FQI', 'MAPE']

# Grouping and pivoting the MI table set
AnalMItableSet = MItableSet[(MItableSet['Model'].isin(AnalTabList))].reset_index(drop=True)
AnalMItableSet = AnalMItableSet.groupby(['Model', 'Metrics']).mean().reset_index().copy()
AnalMItableSet = AnalMItableSet[AnalMItableSet['Metrics'].isin(AnalMetricList)].reset_index(drop=True)
AnalMItableSet = pd.pivot(AnalMItableSet, index='Model', columns='Metrics', values='Values').reset_index().copy()

# Applying normalization function and merging Accuracy and MI table sets
# Assuming Normalization function is defined elsewhere
NormAnalMItableSet = AnalMItableSet.apply(Normalization)
AnalAccMItable = pd.merge(AnalAcctableSet, AnalMItableSet, on='Model', how='inner').sort_values('Model').reset_index(drop=True)

### Extracting integers based on the specified patterns using regular expressions

In [61]:
# Define a function to extract integers based on the specified patterns using regular expressions
def extract_integers_with_re(model_name):
    # Pattern to find an integer between two underscores
    pattern_between_underscores = r'_(\d+)_'
    # Pattern to find an integer between an underscore and the end of the string
    pattern_underscore_to_end = r'_(\d+)$'
    
    # Search for patterns in the model name
    between_underscores_match = re.search(pattern_between_underscores, model_name)
    underscore_to_end_match = re.search(pattern_underscore_to_end, model_name)
    
    # Extract integers if matches are found
    between_underscores_number = int(between_underscores_match.group(1)) if between_underscores_match else np.nan
    underscore_to_end_number = int(underscore_to_end_match.group(1)) if underscore_to_end_match else np.nan
    
    return between_underscores_number, underscore_to_end_number


# Apply the function to extract numbers for each model name in the DataFrame
AnalAccMItable['J'] = np.nan
AnalAccMItable['C'] = np.nan

for index, row in AnalAccMItable.iterrows():
    between_underscores_number, underscore_to_end_number = extract_integers_with_re(row['Model'])
    AnalAccMItable.at[index, 'J'] = between_underscores_number
    AnalAccMItable.at[index, 'C'] = underscore_to_end_number
    
Mask = AnalAccMItable['J'].isna().copy()
AnalAccMItable.loc[Mask, 'J'] = AnalAccMItable.loc[Mask, 'C']
AnalAccMItable.loc[Mask, 'C'] =np.NaN

### Model Mapping and Feature Categorization 

In [62]:
# Define a dictionary to map model codes to their respective names
ModelMap = {
    'BaseVAE': '$\\beta$' + '-VAE',
    'ConVAE': 'ConVAE', 
    'TCVAE': 'TCVAE', 
    'FACVAE': 'FACVAE', 
    'SKZ': 'SKZ',
    'SKZFC': 'SKZFC',
    'TCMIDKZFC': 'TCMIDKZFC', 
    'FACFC': 'FACFC'
}

# Define the features for each model in a dictionary format
ModelFeature = {
    'BaseVAE':    {'Model': 'BaseVAE',   'SKZ': 'O', 'DKZ': '',  'TC': '',  'MI': '',  'Theta': ''},
    'ConVAE':     {'Model': 'ConVAE',    'SKZ': 'O', 'DKZ': '',  'TC': '',  'MI': '',  'Theta': 'Δ'},
    'TCVAE':      {'Model': 'TCVAE',     'SKZ': '',  'DKZ': 'O', 'TC': 'O', 'MI': 'O', 'Theta': ''},
    'FACVAE':     {'Model': 'FACVAE',    'SKZ': 'O', 'DKZ': '',  'TC': 'O', 'MI': '',  'Theta': 'O'},
    'SKZ':        {'Model': 'SKZ',       'SKZ': 'O', 'DKZ': '',  'TC': '',  'MI': '',  'Theta': 'Δ'},
    'SKZFC':      {'Model': 'SKZFC',     'SKZ': 'O', 'DKZ': '',  'TC': '',  'MI': '',  'Theta': 'O'},
    'TCMIDKZFC':  {'Model': 'TCMIDKZFC', 'SKZ': '',  'DKZ': 'O', 'TC': 'O', 'MI': 'O', 'Theta': 'O'},
    'FACFC':      {'Model': 'FACFC',     'SKZ': 'O', 'DKZ': '',  'TC': 'O', 'MI': '',  'Theta': 'O'}
}

# Define a list for maintaining a specific order of models
DefinedOrder = ['BaseVAE', 'ConVAE', 'TCVAE', 'FACVAE', 'SKZ', 'SKZFC', 'TCMIDKZFC' , 'FACFC']

In [79]:
# Initialize an empty DataFrame and add new columns to AnalAccMItable
AggTab = pd.DataFrame()
AnalAccMItable['ModelTitle'] = None
AnalAccMItable['SigType'] = None

# Extract and assign 'ModelTitle' and 'SigType' from 'Model' column
for num, row in AnalAccMItable.iterrows():
    Name, SigType = row['Model'].split('_')[:2]
    AnalAccMItable.loc[num, 'ModelTitle'] = Name
    AnalAccMItable.loc[num, 'SigType'] = SigType

# Select and rename specific columns from AnalAccMItable
SelTable = AnalAccMItable[['ModelTitle', 'J', 'C', 
                           'SigType', 'FQI', 'MAPE', 
                           '(i) I(V;Z)', 
                           '(ii) $I(V; \\acute{Z} \\mid Z)$', 
                           '(iii) $I(V;\\acute{Z})$',
                           '(iv) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$', 
                           '(v) $I(S;\\acute{Z})$',
                           '(vi) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$']]

SelTable = SelTable.rename(columns={'(i) I(V;Z)': '$I(V;Z)$',
                                    '(ii) $I(V; \\acute{Z} \\mid Z)$': '$I(V; \\acute{Z} \\mid Z)$', 
                                    '(iii) $I(V;\\acute{Z})$': '$I(V;\\acute{Z})$',
                                    '(iv) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$': '$I(V;\\acute{\\Theta} \\mid \\acute{Z})$', 
                                    '(v) $I(S;\\acute{Z})$':'$I(S;\\acute{Z})$',
                                    '(vi) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$': '$I(S;\\acute{\\Theta} \\mid \\acute{Z})$'})

# Create a MultiIndex for the columns
Cols = ['FQI', 'MAPE', 
        '$I(V;Z)$',
        '$I(V; \\acute{Z} \\mid Z)$', 
        '$I(V;\\acute{Z})$',
        '$I(V;\\acute{\\Theta} \\mid \\acute{Z})$', 
        '$I(S;\\acute{Z})$',
        '$I(S;\\acute{\\Theta} \\mid \\acute{Z})$']



ARTSelTable =SelTable[SelTable['SigType']=='ART'].sort_values(by=['ModelTitle','C','J']).drop(columns='SigType')
IISelTable =SelTable[SelTable['SigType']=='II'].sort_values(by=['ModelTitle','C','J']).drop(columns='SigType')

AggTable = pd.merge(ARTSelTable, IISelTable, on=['ModelTitle', 'J' ,'C'] )
AggTable = AggTable.round(3)
AggTable = AggTable.fillna('-').copy()

AggTable['ModelTitle'] = pd.Categorical(AggTable['ModelTitle'], categories=DefinedOrder, ordered=True).copy()
AggTable = AggTable.sort_values(by=['ModelTitle','C','J'])

# Save the aggregated table to an Excel file
#AggTable.to_excel('./EvalResults/Tables/AggTab.xlsx')


### For Latex 

In [135]:
for num, row in AggTable.iterrows():
    RowConcat = ' & '
    for num, col in enumerate(row):
        if num>2:
            RowConcat = RowConcat + str(col) + ' & '
    RowConcat = RowConcat[:-3]
    print(RowConcat)

 & 0.047 & 0.054 & 1.783 & 4.08 & - & - & - & - & 0.293 & 0.02 & 1.827 & 3.177 & - & - & - & -
 & 0.078 & 0.057 & 1.765 & 4.084 & - & - & - & - & 0.192 & 0.019 & 1.787 & 3.594 & - & - & - & -
 & 0.063 & 0.055 & 0.145 & 0.066 & 0.107 & 0.981 & 0.0 & 0.255 & 0.121 & 0.018 & 0.225 & 0.173 & 0.188 & 2.1 & 0.0 & 0.281
 & 0.063 & 0.057 & 0.119 & 0.062 & 0.113 & 1.245 & 0.0 & 0.163 & 0.11 & 0.02 & 0.322 & 0.392 & 0.436 & 1.047 & 0.0 & 0.271
 & 0.049 & 0.073 & 1.736 & 3.94 & - & - & - & - & 0.07 & 0.022 & 1.907 & 3.368 & - & - & - & -
 & 0.038 & 0.077 & 1.719 & 4.062 & - & - & - & - & 0.129 & 0.022 & 1.943 & 3.571 & - & - & - & -
 & 0.072 & 0.058 & 1.718 & 3.974 & - & - & - & - & 0.186 & 0.019 & 1.884 & 3.833 & - & - & - & -
 & 0.043 & 0.055 & 1.599 & 3.746 & - & - & - & - & 0.255 & 0.019 & 1.891 & 3.631 & - & - & - & -
 & 0.189 & 0.068 & 2.514 & 2.062 & 2.981 & 0.019 & 0.0 & 0.424 & 1.419 & 0.024 & 1.974 & 1.053 & 2.032 & 0.0 & 0.002 & 0.401
 & 0.133 & 0.091 & 1.663 & 1.766 & 1.337 & 0.13 & 0

In [139]:
AggTable.describe()

Unnamed: 0,J,FQI_x,MAPE_x,$I(V;Z)$_x,$I(V; \acute{Z} \mid Z)$_x,FQI_y,MAPE_y,$I(V;Z)$_y,$I(V; \acute{Z} \mid Z)$_y
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,40.0,0.135958,0.061583,1.805042,3.2265,0.348875,0.020833,1.77075,2.428625
std,10.215078,0.271149,0.009995,0.633894,1.415373,0.425973,0.002514,0.47656,1.057531
min,30.0,0.038,0.049,0.119,0.062,0.07,0.018,0.225,0.173
25%,30.0,0.049,0.054,1.71075,2.8285,0.13425,0.019,1.817,2.061
50%,40.0,0.0645,0.0605,1.774,3.9325,0.156,0.02,1.883,2.6545
75%,50.0,0.0795,0.06625,1.98625,4.08525,0.2625,0.02125,1.9625,3.168
max,50.0,1.366,0.091,3.137,4.432,1.419,0.028,2.306,3.833


### Data Transformation and Aggregation

In [90]:
# Initialize an empty DataFrame and add new columns to AnalAccMItable
AggTab = pd.DataFrame()
AnalAccMItable['ModelTitle'] = None
AnalAccMItable['SigType'] = None

# Extract and assign 'ModelTitle' and 'SigType' from 'Model' column
for num, row in AnalAccMItable.iterrows():
    Name, SigType = row['Model'].split('_')[:2]
    AnalAccMItable.loc[num, 'ModelTitle'] = Name
    AnalAccMItable.loc[num, 'SigType'] = SigType

# Select and rename specific columns from AnalAccMItable
SelTable = AnalAccMItable[['ModelTitle', 'SigType', 'FQI', 'MAPE', 
                           '(ii) $I(V; \\acute{Z} \\mid Z)$', '(iv) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$', 
                           '(vi) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$']]
SelTable = SelTable.rename(columns={'(ii) $I(V; \\acute{Z} \\mid Z)$': '$I(V; \\acute{Z} \\mid Z)$', 
                                    '(iv) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$': '$I(V;\\acute{\\Theta} \\mid \\acute{Z})$', 
                                    '(vi) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$': '$I(S;\\acute{\\Theta} \\mid \\acute{Z})$'})

# Group by 'ModelTitle' and 'SigType', calculate mean and round the results
GroupMeanTable = SelTable.groupby(['ModelTitle', 'SigType']).mean().reset_index()
GroupMeanTable = GroupMeanTable.round(3)

# Create a MultiIndex for the columns
Cols = ['FQI', 'MAPE', '$I(V; \\acute{Z} \\mid Z)$', '$I(V;\\acute{\\Theta} \\mid \\acute{Z})$', '$I(S;\\acute{\\Theta} \\mid \\acute{Z})$']
Iterables = [GroupMeanTable['SigType'].unique(), Cols]
MultIndex = pd.MultiIndex.from_product(Iterables, names=['SigType', 'Measurement'])

# Create a new DataFrame with MultiIndex columns and ModelTitle as index
NestGroupMeanTable = pd.DataFrame(index=GroupMeanTable['ModelTitle'].unique(), columns=MultIndex)

# Populate the new DataFrame
for _, row in GroupMeanTable.iterrows():
    for measurement in Cols:
        NestGroupMeanTable.loc[row['ModelTitle'], (row['SigType'], measurement)] = row[measurement]

# Reset index and rename the index column
NestGroupMeanTable = NestGroupMeanTable.reset_index().copy()
NestGroupMeanTable = NestGroupMeanTable.rename(columns={'index': 'Model'})


### Comprehensive Aggregation and Sorting of Model Features and Performance Metrics for Analysis

In [16]:
# Iterate over NestGroupMeanTable to extract and concatenate feature rows
for num, row in NestGroupMeanTable.iterrows():
    rows = pd.DataFrame.from_dict(ModelFeature[row['Model'].values[0]], orient='index').T
    AggTab = pd.concat([AggTab, rows], axis=0)

# Set 'Model' as the index of AggTab and create a MultiIndex for columns
AggTab = AggTab.set_index('Model')
AggTab.columns = pd.MultiIndex.from_product([AggTab.columns, ['']], names=['Measurement', 'SigType'])

# Ensure 'Model' is set as an index in NestGroupMeanTable for alignment during merging
NestGroupMeanTable.set_index('Model', inplace=True)

# Merge AggTab with NestGroupMeanTable
AggTab = pd.merge(AggTab, NestGroupMeanTable, left_index=True, right_index=True)

# Reset the index to turn 'Model' back into a column
AggTab.reset_index(inplace=True)

# Create a categorical type for 'Model' based on a predefined order and sort the DataFrame
AggTab['Model'] = pd.Categorical(AggTab['Model'], categories=DefinedOrder, ordered=True)
AggTab = AggTab.sort_values('Model')

# Save the aggregated table to an Excel file
AggTab.to_excel('./EvalResults/Tables/AggTab.xlsx')

# Display the aggregated table
AggTab

Unnamed: 0_level_0,Model,SKZ,DKZ,TC,MI,Theta,ART,ART,ART,ART,ART,II,II,II,II,II
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,FQI,MAPE,$I(V; \acute{Z} \mid Z)$,$I(V;\acute{\Theta} \mid \acute{Z})$,$I(S;\acute{\Theta} \mid \acute{Z})$,FQI,MAPE,$I(V; \acute{Z} \mid Z)$,$I(V;\acute{\Theta} \mid \acute{Z})$,$I(S;\acute{\Theta} \mid \acute{Z})$
0,BaseVAE,O,,,,,0.062,0.055,4.082,,,0.243,0.019,3.385,,
1,ConVAE,O,,,,Δ,0.063,0.056,0.064,1.113,0.209,0.115,0.019,0.283,1.573,0.276
7,TCVAE,,O,O,O,,0.043,0.075,4.001,,,0.1,0.022,3.47,,
3,FACVAE,O,,O,,O,0.057,0.056,3.86,,,0.22,0.019,3.732,,
4,SKZ,O,,,,Δ,0.515,0.072,1.419,0.038,0.311,1.268,0.026,1.141,0.004,0.337
5,SKZFC,O,,,,O,0.07,0.058,4.15,0.091,0.433,0.166,0.02,2.591,0.175,0.457
6,TCMIDKZFC,,O,O,O,O,0.054,0.058,3.597,0.244,0.552,0.174,0.019,2.78,0.359,0.517
2,FACFC,O,,O,,O,0.063,0.061,4.189,0.096,0.529,0.147,0.02,2.625,0.161,0.569
