# Disseration Experiment 8a
# Experiment Functions for XAI Metric Storage for Dissertation
Ciaran Finnegan January 2024

In [1]:
import pandas as pd
import os
import time
import random
import openpyxl

### Suppress Warnings to clean up output

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)

## Read File and split into a list of dataframes

In [3]:
def split_TestData_into_nn_Blocks(df, num_splits = 20):
    # Split the DataFrame into 20 consecutive smaller DataFrames
    
    split_size = len(df) // num_splits
    list_df = [df.iloc[i * split_size: min((i + 1) * split_size, len(df))] for i in range(num_splits)]

    # Print the dimensions of the data split
    print(f"Original DF Length: {len(df)}")
    print(f"Split Size: {split_size}")
    
    return split_size, list_df

## Add a routine to check output values

In [4]:
def startBlockDisplay(df, split_size, rec_cnt = 6):
    # Display starting points in the first four sub dataframes
    end = 0
    debug_recs = rec_cnt  # Select 6 records
    for start in range(0, len(df), split_size):
        if end < len(df): # avoid out of bound errors
            if end < (split_size * debug_recs):  # limit range of lines to check for write out
                # Display the slice of the DataFrame
                display(df.iloc[[end]])
                end = start + split_size

## Function to update or create the spreadsheet and determine the 'Sample' number

In [5]:
#def update_xai_spreadsheet(xai_file_path):
def return_xai_spreadsheet_resumePoint(xai_file_path):    
    if os.path.exists(xai_file_path):
        # Read the existing spreadsheet
        xai_df = pd.read_excel(xai_file_path)

        # Check if 'Sample Number' column exists and find the next sample number
        if 'Sample Number' in xai_df.columns:
            sample = xai_df['Sample Number'].max() + 1
        else:
            sample = 1
    else:
        # Create an empty spreadsheet
        pd.DataFrame().to_excel(xai_file_path, engine='openpyxl')
        sample = 1

    return sample

## Function to return next sample index to process

In [6]:
def return_next_sample_number_to_process(list_df, xai_file_path, xai_method):
    # Call Function to update or create the spreadsheet and determine the 'Sample' number
    # Process each dataframe in 'list_df'
    for df in list_df:
        # Update or create the spreadsheet and get the 'Sample' number
        sample = return_xai_spreadsheet_resumePoint(xai_file_path)

        # Print the 'Sample' number
        print(f"Sample Number for current {xai_method} XAI DataFrame: {sample}")
        break
        
    return sample

## Function to load next block of Test Data

In [7]:
def select_restart_testdata_block(df_Selected_from_List,list_df, xai_file_path):
    # Process each dataframe in 'list_df'
    for index, df in enumerate(list_df):
        # Update or create the spreadsheet and get the 'Sample' number
        sample = return_xai_spreadsheet_resumePoint(xai_file_path)

        key = f'df_{sample}'
        if key in list_df:
            df_Selected_from_List = list_df[key]
            print(f"DataFrame selected for Sample Number {sample} witk key {key}")
            break
        else:
            print(f"No DataFrame found for Sample Number {sample}")   
            break

    return df_Selected_from_List, key

## Function to write XAI metrics for tesrt data block to XL

In [8]:
def append_xai_metrics_to_excel(file_path, 
                    sample, 
                    xai_identity, 
                    xai_stability, 
                    xai_seperability, 
                    xai_similarity, 
                    comp_efficiency, 
                    df_row):
    # Check if the file exists and read it; if not, create a new DataFrame
    if os.path.exists(file_path):
        existing_data = pd.read_excel(file_path, engine='openpyxl')
    else:
        existing_data = pd.DataFrame()

    # Prepare the new row to be appended
    new_row = {
        'Sample Number': sample,
        'XAI_Identity'    : xai_identity,
        'XAI_Stability'   : xai_stability,
        'XAI_Seperability': xai_seperability,
        'XAI_Similairity' : xai_similarity,
        'Comp_Efficiency' : comp_efficiency
    }

    # Add data from the first row of df_Selected_from_List
    for col in df_row.index[:3]:  # Taking the first three columns
        new_row[col] = df_row[col]

    # Append the new row to the existing DataFrame
    updated_data = existing_data.append(new_row, ignore_index=True)

    # Write the updated DataFrame back to the Excel file
    updated_data.to_excel(file_path, index=False, engine='openpyxl')

## Wrapper function to call routine to write XAI metrics to XL

In [9]:
def write_xai_Metrics_to_XL(xai_file_path, 
                            sample, 
                            xai_identity, 
                            xai_stability, 
                            xai_seperability, 
                            xai_similarity, 
                            comp_efficiency,
                            df_Selected_from_List):
    
    # Now, df_Selected_from_List should be a DataFrame. 
    # If a DataFrame was found, proceed with the next operations
    if df_Selected_from_List is not None:

        # Extract the first row from df_Selected_from_List
        first_row_of_selected_df = df_Selected_from_List.iloc[0]

        # Append the data to the Excel file
        append_xai_metrics_to_excel(xai_file_path, 
                                    sample, 
                                    xai_identity, 
                                    xai_stability, 
                                    xai_seperability, 
                                    xai_similarity, 
                                    comp_efficiency, 
                                    first_row_of_selected_df)

        print(f"DiCE DataFrame selected for Sample Number {sample} witk key {key}")

    else:

        print("DiCE DataFrame not selected or does not exist.")