In [None]:
import pandas as pd

## Mean Fold Condition Analysis

In [None]:
mean_rule_elements_df = pd.DataFrame()
with open("../../Data/explainability/mean_acc_fold/mean_rulebase.txt", "r") as f:
    lines = f.readlines()
    max_rule_length = 0
    
    for rule in lines:
        rule_number = int(rule.split(':')[0].split()[1])
        rule_result = int(rule.split(' THEN ')[-1].split('\n')[0].split()[-1])
        rule_stem = rule.split(' IF ')[1].split(' AND')[0]
        rule_pieces = ', '.join(rule.split(' AND ')[1:]).split('\n')[0].split(' THEN ')[0]
        rule_pieces = rule_stem + ', ' + rule_pieces
        rule_pieces_list = rule_pieces.split(', ')

        rule_length = len(rule_pieces_list)
        max_rule_length = max(max_rule_length, rule_length)

        row_data = {
            'Rule': [rule_number], 
            'Rule Length': [rule_length], 
            'Rule Piece 1': [rule_stem], 
            'Rule Pieces': [rule_pieces],
            'Rule Decision': [rule_result]
        }

        # Add rule pieces dynamically
        for i, piece in enumerate(rule_pieces_list):
            row_data[f'Rule Piece {i+1}'] = [piece]

        row = pd.DataFrame(row_data)
        mean_rule_elements_df = pd.concat([mean_rule_elements_df, row], axis=0, ignore_index=True)

column_order = ['Rule', 'Rule Length', 'Rule Decision', 'Rule Piece 1'] + \
               [f'Rule Piece {i+1}' for i in range(1, max_rule_length)]

mean_rule_elements_df = mean_rule_elements_df.reindex(columns=column_order)

In [2]:
mean_rule_acc_df = pd.read_csv('../../Data/explainability/mean_acc_fold/mean_rule_results.csv')

In [3]:
mean_rule_analysis_df = pd.merge(mean_rule_acc_df, mean_rule_elements_df, on=['Rule', 'Rule Decision'], how='right')
mean_rule_analysis_df = mean_rule_analysis_df.fillna(0)
mean_rule_analysis_df['#Times_Used'] = mean_rule_analysis_df['#Times_Used'].astype(int)
mean_rule_analysis_df = mean_rule_analysis_df.sort_values(by=['Accuracy', '#Times_Used'], ascending=[False, False], ignore_index=True)

In [4]:
mean_rule_analysis_df.to_csv('../../Data/explainability/mean_acc_fold/mean_condition_analysis.csv', index=False)

In [5]:
mean_results = []
for col_name in mean_rule_analysis_df.columns[5:]:
    print(col_name)
    for condition in mean_rule_analysis_df[col_name].unique():
        #print(condition)
        condition_df = mean_rule_analysis_df[mean_rule_analysis_df[col_name]==condition]
        condition_total_times_used = sum(condition_df['#Times_Used'])
        condition_accuracies = list(condition_df['Accuracy'])
        condition_times_used = list(condition_df['#Times_Used'])
        #print(len(rule_condition_accuracies), len(rule_condition_times_used))
        i=0
        condition_grade_numerator = 0
        while i < len(condition_accuracies):
            accuracy_times_used = condition_accuracies[i] * condition_times_used[i]
            condition_grade_numerator = condition_grade_numerator + accuracy_times_used
            i += 1
        #print(rule_grade_numerator)
        if condition_total_times_used == 0:
            condition_grade=0
        else:
            condition_grade = condition_grade_numerator / condition_total_times_used
        if condition==0:
            continue
        else:
            print(f"Condition: {condition}, Grade: {condition_grade}, Times Used: {condition_total_times_used}, Condition Weight: {condition_grade*(condition_total_times_used/3238)}")
        # for index, row in condition_df.iterrows():
        #     rule_condition_accuracies.append(row['Accurcay'])
        #     rule_condition_times_used.append(row['#Times_Used'])
        #     rule
        condition_incorrect = round(condition_total_times_used - (condition_grade*condition_total_times_used))
        condition_weight = condition_grade*(condition_total_times_used/3238)
        condition_score = condition_weight * condition_grade
        mean_results.append({
            "Rule Piece": col_name,
            "Condition": condition,
            "Grade": condition_grade,
            "Times Used": condition_total_times_used,
            "Condition Weight": condition_weight,
            "Times Incorrect": condition_incorrect,
            'Condition Score': condition_score
        })
    

mean_results_df = pd.DataFrame(mean_results).sort_values(by='Condition Score', ascending=False, ignore_index=True)
mean_results_df.to_csv('../../Data/explainability/mean_acc_fold/mean_piece_grades.csv', index=False)
        

Rule Piece 1
Condition: defenders_in_box <= 2.50, Grade: 0.9470752089721448, Times Used: 359, Condition Weight: 0.10500308833261271
Condition: defenders_in_box > 2.50, Grade: 0.5237929837415769, Times Used: 2879, Condition Weight: 0.46571958004694247
Rule Piece 2
Condition: yardline_100 > 34.50, Grade: 0.948000000084, Times Used: 250, Condition Weight: 0.07319332922205066
Condition: yardline_100 <= 34.50, Grade: 0.944954128440367, Times Used: 109, Condition Weight: 0.03180975911056207
Condition: game_seconds_remaining > 2387.50, Grade: 0.5121951220555555, Times Used: 738, Condition Weight: 0.11673872763341568
Condition: game_seconds_remaining <= 2387.50, Grade: 0.527790752038767, Times Used: 2141, Condition Weight: 0.3489808524135269
Rule Piece 3
Condition: offensive_predictability > 0.02, Grade: 0.9389671362488263, Times Used: 213, Condition Weight: 0.06176652255126622
Condition: DefenseTeam <= 22.50, Grade: 1.0, Times Used: 80, Condition Weight: 0.02470660901791229
Condition: AWAY_da

In [6]:
rule_condition_cols = [col for col in mean_rule_analysis_df.columns if 'Rule Piece' in col]

# Melt df to get conditions in one column
mean_rule_analysis_df_melted = mean_rule_analysis_df.melt(id_vars=['Rule'], value_vars=rule_condition_cols, 
                      var_name='Rule Piece', value_name='Condition')

# Drop any missing conditions
mean_rule_analysis_df_melted = mean_rule_analysis_df_melted.dropna(subset=['Condition'])

# Merge with results on the condition
df_combined = mean_rule_analysis_df_melted.merge(mean_results_df[['Condition', 'Condition Score']], on='Condition', how='left')

# Now group by Rule and sum the Rule Scores
mean_rule_scores = df_combined.groupby('Rule')['Condition Score'].sum().reset_index()
mean_rule_scores.rename(columns={'Condition Score': 'Total Rule Score'}, inplace=True)

mean_rule_scores.to_csv('../../Data/explainability/mean_acc_fold/mean_rule_scores.csv', index=False)

In [7]:
#getting average condition score per condition (regardless of branch condition is on)
mean_condition_rankings = mean_results_df.groupby(['Condition']).agg({'Condition Score': 'mean'}).reset_index().sort_values(by='Condition Score', ascending=False, ignore_index=True)
mean_feature_rankings = mean_condition_rankings.copy()
mean_feature_rankings['Feature'] = mean_feature_rankings['Condition'].str.split().str[0]
mean_feature_rankings['comparison operator'] = mean_feature_rankings['Condition'].str.split().str[1]
mean_feature_rankings['threshold'] = mean_feature_rankings['Condition'].str.split().str[-1].astype(float)
mean_feature_rankings = mean_feature_rankings[['Feature', 'comparison operator', 'threshold', 'Condition Score']]
mean_feature_rankings = mean_feature_rankings.groupby('Feature').agg({'Condition Score' : 'mean'}).reset_index().sort_values(by='Condition Score', ascending=False, ignore_index=True)
mean_feature_rankings = mean_feature_rankings.rename(columns={'Condition Score' : 'Mean Fold Feature Score'})
feature_scores_total = mean_feature_rankings['Mean Fold Feature Score'].sum()
# mean_feature_rankings['Feature Score Normalised'] = mean_feature_rankings['Feature Score'] / feature_scores_total
mean_feature_rankings

Unnamed: 0,Feature,Mean Fold Feature Score
0,defenders_in_box,0.171693
1,play_type,0.050969
2,home_team_on_defense,0.042383
3,epa,0.040204
4,stadium,0.037342
5,run_count,0.030744
6,yardline_100,0.029601
7,offensive_predictability,0.0294
8,HOME_day_since_last_game,0.029176
9,DefenseTeam,0.02894


## Max Fold condition analysis

In [8]:
max_rule_elements_df = pd.DataFrame()
with open("../../Data/explainability/max_acc_fold/max_rulebase.txt", "r") as f:
    lines = f.readlines()
    max_rule_length = 0
    
    for rule in lines:
        rule_number = int(rule.split(':')[0].split()[1])
        rule_result = int(rule.split(' THEN ')[-1].split('\n')[0].split()[-1])
        rule_stem = rule.split(' IF ')[1].split(' AND')[0]
        rule_pieces = ', '.join(rule.split(' AND ')[1:]).split('\n')[0].split(' THEN ')[0]
        rule_pieces = rule_stem + ', ' + rule_pieces
        rule_pieces_list = rule_pieces.split(', ')

        rule_length = len(rule_pieces_list)
        max_rule_length = max(max_rule_length, rule_length)

        row_data = {
            'Rule': [rule_number], 
            'Rule Length': [rule_length], 
            'Rule Piece 1': [rule_stem], 
            'Rule Pieces': [rule_pieces],
            'Rule Decision': [rule_result]
        }

        # Add rule pieces dynamically
        for i, piece in enumerate(rule_pieces_list):
            row_data[f'Rule Piece {i+1}'] = [piece]

        row = pd.DataFrame(row_data)
        max_rule_elements_df = pd.concat([max_rule_elements_df, row], axis=0, ignore_index=True)

column_order = ['Rule', 'Rule Length', 'Rule Decision', 'Rule Piece 1'] + \
               [f'Rule Piece {i+1}' for i in range(1, max_rule_length)]

max_rule_elements_df = max_rule_elements_df.reindex(columns=column_order)

In [9]:
max_rule_acc_df = pd.read_csv('../../Data/explainability/max_acc_fold/max_rule_results.csv')

In [10]:
max_rule_analysis_df = pd.merge(max_rule_acc_df, max_rule_elements_df, on=['Rule', 'Rule Decision'], how='right')
max_rule_analysis_df = max_rule_analysis_df.fillna(0)
max_rule_analysis_df['#Times_Used'] = max_rule_analysis_df['#Times_Used'].astype(int)
max_rule_analysis_df = max_rule_analysis_df.sort_values(by=['Accuracy', '#Times_Used'], ascending=[False, False], ignore_index=True)

In [11]:
max_rule_analysis_df.to_csv('../../Data/explainability/max_acc_fold/max_condition_analysis.csv', index=False)

In [12]:
max_results = []
for col_name in max_rule_analysis_df.columns[5:]:
    print(col_name)
    for condition in max_rule_analysis_df[col_name].unique():
        #print(condition)
        condition_df = max_rule_analysis_df[max_rule_analysis_df[col_name]==condition]
        condition_total_times_used = sum(condition_df['#Times_Used'])
        condition_accuracies = list(condition_df['Accuracy'])
        condition_times_used = list(condition_df['#Times_Used'])
        #print(len(rule_condition_accuracies), len(rule_condition_times_used))
        i=0
        condition_grade_numerator = 0
        while i < len(condition_accuracies):
            accuracy_times_used = condition_accuracies[i] * condition_times_used[i]
            condition_grade_numerator = condition_grade_numerator + accuracy_times_used
            i += 1
        #print(rule_grade_numerator)
        if condition_total_times_used == 0:
            condition_grade=0
        else:
            condition_grade = condition_grade_numerator / condition_total_times_used
        if condition==0:
            continue
        else:
            print(f"Condition: {condition}, Grade: {condition_grade}, Times Used: {condition_total_times_used}, Condition Weight: {condition_grade*(condition_total_times_used/3238)}")
        # for index, row in condition_df.iterrows():
        #     rule_condition_accuracies.append(row['Accurcay'])
        #     rule_condition_times_used.append(row['#Times_Used'])
        #     rule
        condition_incorrect = round(condition_total_times_used - (condition_grade*condition_total_times_used))
        condition_weight = condition_grade*(condition_total_times_used/3238)
        condition_score = condition_weight * condition_grade
        max_results.append({
            "Rule Piece": col_name,
            "Condition": condition,
            "Grade": condition_grade,
            "Times Used": condition_total_times_used,
            "Condition Weight": condition_weight,
            "Times Incorrect": condition_incorrect,
            'Condition Score': condition_score
        })
    

max_results_df = pd.DataFrame(max_results).sort_values(by='Condition Score', ascending=False, ignore_index=True)
max_results_df.to_csv('../../Data/explainability/max_acc_fold/max_piece_grades.csv', index=False)
        

Rule Piece 1
Condition: defenders_in_box > 2.50, Grade: 0.6494133882774327, Times Used: 2898, Condition Weight: 0.5812229769079679
Condition: defenders_in_box <= 2.50, Grade: 0.7735294115823529, Times Used: 340, Condition Weight: 0.08122297712723904
Rule Piece 2
Condition: game_seconds_remaining > 1510.50, Grade: 0.6811397555488469, Times Used: 1474, Condition Weight: 0.3100679430756641
Condition: yardline_100 > 33.50, Grade: 0.9073170731707317, Times Used: 205, Condition Weight: 0.057442865966646085
Condition: game_seconds_remaining <= 1510.50, Grade: 0.6165730333911518, Times Used: 1424, Condition Weight: 0.27115503383230394
Condition: yardline_100 <= 33.50, Grade: 0.5703703699111111, Times Used: 135, Condition Weight: 0.02378011116059296
Rule Piece 3
Condition: epa <= 0.98, Grade: 0.6647992528450047, Times Used: 1071, Condition Weight: 0.2198888201967264
Condition: current_defence_rank <= 29.50, Grade: 0.9308510638297872, Times Used: 188, Condition Weight: 0.05404570722668314
Condit

In [13]:
rule_condition_cols = [col for col in max_rule_analysis_df.columns if 'Rule Piece' in col]

# Melt df to get conditions in one column
max_rule_analysis_df_melted = max_rule_analysis_df.melt(id_vars=['Rule'], value_vars=rule_condition_cols, 
                      var_name='Rule Piece', value_name='Condition')

# Drop any missing conditions
max_rule_analysis_df_melted = max_rule_analysis_df_melted.dropna(subset=['Condition'])

# Merge with results on the condition
df_combined = max_rule_analysis_df_melted.merge(max_results_df[['Condition', 'Condition Score']], on='Condition', how='left')

# Now group by Rule and sum the Rule Scores
max_rule_scores = df_combined.groupby('Rule')['Condition Score'].sum().reset_index()
max_rule_scores.rename(columns={'Condition Score': 'Total Rule Score'}, inplace=True)

max_rule_scores.to_csv('../../Data/explainability/max_acc_fold/max_rule_scores.csv', index=False)

In [14]:
#getting average condition score per condition (regardless of branch condition is on)
max_condition_rankings = max_results_df.groupby(['Condition']).agg({'Condition Score': 'mean'}).reset_index().sort_values(by='Condition Score', ascending=False, ignore_index=True)
max_feature_rankings = max_condition_rankings.copy()
max_feature_rankings['Feature'] = max_feature_rankings['Condition'].str.split().str[0]
max_feature_rankings['comparison operator'] = max_feature_rankings['Condition'].str.split().str[1]
max_feature_rankings['threshold'] = max_feature_rankings['Condition'].str.split().str[-1].astype(float)
max_feature_rankings = max_feature_rankings[['Feature', 'comparison operator', 'threshold', 'Condition Score']]
max_feature_rankings = max_feature_rankings.groupby('Feature').agg({'Condition Score' : 'mean'}).reset_index().sort_values(by='Condition Score', ascending=False, ignore_index=True)
max_feature_rankings = max_feature_rankings.rename(columns={'Condition Score' : 'Max Fold Feature Score'})
feature_scores_total = max_feature_rankings['Max Fold Feature Score'].sum()
# max_feature_rankings['Feature Score Normalised'] = max_feature_rankings['Feature Score'] / feature_scores_total
max_feature_rankings

Unnamed: 0,Feature,Max Fold Feature Score
0,defenders_in_box,0.220141
1,game_seconds_remaining,0.108418
2,epa,0.091511
3,offense_formation,0.083984
4,HOME_day_since_last_game,0.074784
5,home_team_on_defense,0.046812
6,goal_line_situation,0.033766
7,yardline_100,0.032756
8,run_count,0.031639
9,total_play_count,0.015661


## Min Fold Condition analysis

In [15]:
min_rule_elements_df = pd.DataFrame()
with open("../../Data/explainability/min_acc_fold/min_rulebase.txt", "r") as f:
    lines = f.readlines()
    max_rule_length = 0
    
    for rule in lines:
        rule_number = int(rule.split(':')[0].split()[1])
        rule_result = int(rule.split(' THEN ')[-1].split('\n')[0].split()[-1])
        #print(rule_number,rule_result)
        rule_stem = rule.split(' IF ')[1].split(' AND')[0]
        rule_pieces = ', '.join(rule.split(' AND ')[1:]).split('\n')[0].split(' THEN ')[0]
        rule_pieces = rule_stem + ', ' + rule_pieces
        rule_pieces_list = rule_pieces.split(', ')

        rule_length = len(rule_pieces_list)
        max_rule_length = max(max_rule_length, rule_length)

        row_data = {
            'Rule': [rule_number], 
            'Rule Length': [rule_length], 
            'Rule Piece 1': [rule_stem], 
            'Rule Pieces': [rule_pieces],
            'Rule Decision': [rule_result]
        }

        # Add rule pieces dynamically
        for i, piece in enumerate(rule_pieces_list):
            row_data[f'Rule Piece {i+1}'] = [piece]

        row = pd.DataFrame(row_data)
        min_rule_elements_df = pd.concat([min_rule_elements_df, row], axis=0, ignore_index=True)

column_order = ['Rule', 'Rule Length', 'Rule Decision', 'Rule Piece 1'] + \
               [f'Rule Piece {i+1}' for i in range(1, max_rule_length)]

min_rule_elements_df = min_rule_elements_df.reindex(columns=column_order)

In [16]:
min_rule_acc_df = pd.read_csv('../../Data/explainability/min_acc_fold/min_rule_results.csv')

In [17]:
min_rule_analysis_df = pd.merge(min_rule_acc_df, min_rule_elements_df, on=['Rule', 'Rule Decision'], how='right')
min_rule_analysis_df = min_rule_analysis_df.fillna(0)
min_rule_analysis_df['#Times_Used'] = min_rule_analysis_df['#Times_Used'].astype(int)
min_rule_analysis_df = min_rule_analysis_df.sort_values(by=['Accuracy', '#Times_Used'], ascending=[False, False], ignore_index=True)

In [18]:
min_rule_analysis_df.to_csv('../../Data/explainability/min_acc_fold/min_condition_analysis.csv', index=False)

In [19]:
min_results = []
for col_name in min_rule_analysis_df.columns[5:]:
    print(col_name)
    for condition in min_rule_analysis_df[col_name].unique():
        #print(condition)
        condition_df = min_rule_analysis_df[min_rule_analysis_df[col_name]==condition]
        condition_total_times_used = sum(condition_df['#Times_Used'])
        condition_accuracies = list(condition_df['Accuracy'])
        condition_times_used = list(condition_df['#Times_Used'])
        #print(len(rule_condition_accuracies), len(rule_condition_times_used))
        i=0
        condition_grade_numerator = 0
        while i < len(condition_accuracies):
            accuracy_times_used = condition_accuracies[i] * condition_times_used[i]
            condition_grade_numerator = condition_grade_numerator + accuracy_times_used
            i += 1
        #print(rule_grade_numerator)
        if condition_total_times_used == 0:
            condition_grade=0
        else:
            condition_grade = condition_grade_numerator / condition_total_times_used
        if condition==0:
            continue
        else:
            print(f"Condition: {condition}, Grade: {condition_grade}, Times Used: {condition_total_times_used}, Condition Weight: {condition_grade*(condition_total_times_used/3238)}")
        # for index, row in condition_df.iterrows():
        #     rule_condition_accuracies.append(row['Accurcay'])
        #     rule_condition_times_used.append(row['#Times_Used'])
        #     rule
        condition_incorrect = round(condition_total_times_used - (condition_grade*condition_total_times_used))
        condition_weight = condition_grade*(condition_total_times_used/3238)
        condition_score = condition_weight * condition_grade
        min_results.append({
            "Rule Piece": col_name,
            "Condition": condition,
            "Grade": condition_grade,
            "Times Used": condition_total_times_used,
            "Condition Weight": condition_weight,
            "Times Incorrect": condition_incorrect,
            'Condition Score': condition_score
        })
    

min_results_df = pd.DataFrame(min_results).sort_values(by='Condition Score', ascending=False, ignore_index=True)
min_results_df.to_csv('../../Data/explainability/min_acc_fold/min_piece_grades.csv', index=False)
        

Rule Piece 1
Condition: defenders_in_box <= 2.50, Grade: 0.8095238094677871, Times Used: 357, Condition Weight: 0.0892526250710315
Condition: defenders_in_box > 2.50, Grade: 0.4140923291426588, Times Used: 2881, Condition Weight: 0.3684373070599135
Rule Piece 2
Condition: yardline_100 > 34.50, Grade: 0.7442748090839695, Times Used: 262, Condition Weight: 0.06022235947498456
Condition: yardline_100 <= 34.50, Grade: 0.9894736842105263, Times Used: 95, Condition Weight: 0.02903026559604694
Condition: game_seconds_remaining > 2371.00, Grade: 0.3353584444471446, Times Used: 823, Condition Weight: 0.08523780104385421
Condition: game_seconds_remaining <= 2371.00, Grade: 0.4455782315257531, Times Used: 2058, Condition Weight: 0.28319950601605925
Rule Piece 3
Condition: current_defence_rank <= 27.50, Grade: 0.77083333325, Times Used: 240, Condition Weight: 0.05713403334774553
Condition: Precipitation <= 0.00, Grade: 1.0, Times Used: 81, Condition Weight: 0.025015441630636195
Condition: OffenseT

In [20]:
rule_condition_cols = [col for col in min_rule_analysis_df.columns if 'Rule Piece' in col]

# Melt df to get conditions in one column
min_rule_analysis_df_melted = min_rule_analysis_df.melt(id_vars=['Rule'], value_vars=rule_condition_cols, 
                      var_name='Rule Piece', value_name='Condition')

# Drop any missing conditions
min_rule_analysis_df_melted = min_rule_analysis_df_melted.dropna(subset=['Condition'])

# Merge with results on the condition
df_combined = min_rule_analysis_df_melted.merge(min_results_df[['Condition', 'Condition Score']], on='Condition', how='left')

# Now group by Rule and sum the Rule Scores
min_rule_scores = df_combined.groupby('Rule')['Condition Score'].sum().reset_index()
min_rule_scores.rename(columns={'Condition Score': 'Total Rule Score'}, inplace=True)

min_rule_scores.to_csv('../../Data/explainability/min_acc_fold/min_rule_scores.csv', index=False)

In [21]:
#getting average condition score per condition (regardless of branch condition is on)
min_condition_rankings = min_results_df.groupby(['Condition']).agg({'Condition Score': 'mean'}).reset_index().sort_values(by='Condition Score', ascending=False, ignore_index=True)
min_feature_rankings = min_condition_rankings.copy()
min_feature_rankings['Feature'] = min_feature_rankings['Condition'].str.split().str[0]
min_feature_rankings['comparison operator'] = min_feature_rankings['Condition'].str.split().str[1]
min_feature_rankings['threshold'] = min_feature_rankings['Condition'].str.split().str[-1].astype(float)
min_feature_rankings = min_feature_rankings[['Feature', 'comparison operator', 'threshold', 'Condition Score']]
min_feature_rankings = min_feature_rankings.groupby('Feature').agg({'Condition Score' : 'mean'}).reset_index().sort_values(by='Condition Score', ascending=False, ignore_index=True)
min_feature_rankings = min_feature_rankings.rename(columns={'Condition Score' : 'Min Fold Feature Score'})
feature_scores_total = min_feature_rankings['Min Fold Feature Score'].sum()
# min_feature_rankings['Feature Score Normalised'] = min_feature_rankings['Feature Score'] / feature_scores_total
min_feature_rankings

Unnamed: 0,Feature,Min Fold Feature Score
0,defenders_in_box,0.11241
1,offense_formation,0.054375
2,game_seconds_remaining,0.041103
3,surface_type,0.026096
4,epa,0.02229
5,yardline_100,0.02107
6,score_differential,0.015366
7,Precipitation,0.014372
8,home_team_on_defense,0.011145
9,OffenseTeam,0.009537


# Feature Importance Merging #

In [22]:
mean_max_merge = pd.merge(mean_feature_rankings, max_feature_rankings, on='Feature', how='outer').fillna(0)
overall_feature_rankings = pd.merge(mean_max_merge, min_feature_rankings, on='Feature', how='outer').fillna(0)

overall_feature_rankings['Total Feature Score'] = overall_feature_rankings['Mean Fold Feature Score'] + overall_feature_rankings['Max Fold Feature Score'] + overall_feature_rankings['Min Fold Feature Score']
total = overall_feature_rankings['Total Feature Score'].sum()
overall_feature_rankings['Normalised Feature Score'] = overall_feature_rankings['Total Feature Score'] / total
overall_feature_rankings = overall_feature_rankings.sort_values(by='Normalised Feature Score', ascending=False, ignore_index=True)
overall_feature_rankings
overall_feature_rankings.to_csv('../../Data/explainability/overall_feature_rankings.csv', index=False)