# Pareto optimization of event chains
Program for analyzing the dataframe containing event chains and selecting Pareto optimal solutions.

In [40]:
import pandas as pd
import pickle

def load_simulation_output_df():
    ''' Load the event chain simulation output. '''
    simulation_output_df = pd.read_pickle('simulation_output_df.pkl')
    
    # Convert columns to str for hashing.
    simulation_output_df['aggr_attack_type'] = simulation_output_df['aggr_attack_type'].astype(str)
    simulation_output_df['aggr_attacking_nodes'] = simulation_output_df['aggr_attacking_nodes'].astype(str)
    simulation_output_df['event_probs'] = simulation_output_df['event_probs'].astype(str)
    
    return simulation_output_df

### Process the successful paths

In [41]:
def process_successful_paths(simulation_output_df):
    ''' Select successful event chains, group by decisions and calculate metrics. '''
    
    # Select aggr_node_paths from pareto optimal list.
    successful_outcomes = simulation_output_df[simulation_output_df['graph_conquered'] == True]
    successful_paths_list = list(set(successful_outcomes['aggr_node_path']))
    successful_paths_list
    successful_paths_list_data = simulation_output_df[simulation_output_df['aggr_node_path'].isin(successful_paths_list)]

    # Add node path aggregated probability mean to df.
    successful_paths_list_data = successful_paths_list_data.copy()
    path_and_attack_prob_mean = successful_paths_list_data.groupby('aggr_node_path')['aggr_prob'].mean().to_dict()
    successful_paths_list_data['node_path_prob_mean'] = successful_paths_list_data['aggr_node_path'].map(path_and_attack_prob_mean)
    successful_paths_list_data['success_weight'] = successful_paths_list_data.apply(
        lambda row: row['aggr_prob'] / row['node_path_prob_mean'], axis=1)
    successful_paths_list_data

    # Calculate expected troop node values for node paths using probability weight.
    def calculate_weighted_final_troops(group):
        '''Calculate expected final troop numbers weighted by success weights.'''
        weighted_sum = sum(group['success_weight'] * group['final_troop_numbers'].apply(lambda x: x['Player1']))
        total_weight = group['success_weight'].sum()
        return weighted_sum / total_weight if total_weight > 0 else 0
    expected_troops_by_path = (
        successful_paths_list_data.groupby('aggr_node_path')[['success_weight', 'final_troop_numbers']]
        .apply(calculate_weighted_final_troops))
    expected_troops_df = expected_troops_by_path.reset_index(name='expected_final_troops')

    # Create success weight based on node path, attack type and attacking nodes.
    successful_paths_list_data = successful_paths_list_data.copy()
    path_and_attack_prob_mean = successful_paths_list_data.groupby(['aggr_node_path', 'aggr_attack_type', 'aggr_attacking_nodes'])['aggr_prob'].mean().to_dict()
    successful_paths_list_data['path_and_attack_prob_mean'] = successful_paths_list_data.set_index(
        ['aggr_node_path', 'aggr_attack_type', 'aggr_attacking_nodes']).index.map(path_and_attack_prob_mean)
    successful_paths_list_data['success_weight'] = successful_paths_list_data.apply(
        lambda row: row['aggr_prob'] / row['path_and_attack_prob_mean'], axis=1)
    successful_paths_list_data

    # Calculate expected troop numbers for the groups.
    expected_troops_by_path_and_attack = (
        successful_paths_list_data.groupby(['aggr_node_path', 'aggr_attack_type', 'aggr_attacking_nodes'])[['success_weight', 'final_troop_numbers']]
        .apply(calculate_weighted_final_troops))
    path_and_attack_exp_troops = expected_troops_by_path_and_attack.reset_index(name='expected_final_troops')
    path_and_attack_exp_troops

    # Calculate success mean based on node path AND attack type.
    path_and_attack_prob_mean = successful_paths_list_data.groupby(['aggr_node_path', 'aggr_attack_type', 'aggr_attacking_nodes'])['aggr_prob'].mean().reset_index()

    # Merge into df and save to pickle.
    path_and_attack_properties_df = pd.merge(path_and_attack_prob_mean, path_and_attack_exp_troops, how='inner')
    path_and_attack_properties_df.columns = ['Node path', 'Attack type', 'Attacking nodes', 'Success coefficient', 'Expected troop number']
    path_and_attack_properties_df = path_and_attack_properties_df.sort_values(by='Success coefficient', ascending=False)
    #path_and_attack_properties_df.to_pickle('path_and_attack_properties_df.pkl')
    
    return path_and_attack_properties_df

### Pareto optimize
Functions for calculating pareto fronts and collecting specified numbers of solutions.

In [42]:
def pareto_optimal(df, criteria1, criteria2):
    ''' Identify pareto optimal rows. '''
    pareto_front = []
    for i, row in df.iterrows():
        dominated = False
        for i, other_row in df.iterrows():
            
            # Check if 'row' is dominated by 'other_row'.
            if (other_row[criteria1] >= row[criteria1]
                and other_row[criteria2] >= row[criteria2]
                and (other_row[criteria1] > row[criteria1]
                    or other_row[criteria2] > row[criteria2])):
                dominated = True
                break
        if not dominated:
            pareto_front.append(row)
    
    return pd.DataFrame(pareto_front)

def collect_pareto_solutions(df, criteria1, criteria2, top_n):
    ''' Collect n number of solutions starting from the top. '''
    pareto_final = pd.DataFrame()
    remaining_df = df.copy()

    while len(pareto_final) < top_n and not remaining_df.empty:
        pareto_front = pareto_optimal(remaining_df, criteria1, criteria2)
        pareto_final = pd.concat([pareto_final, pareto_front])
        remaining_df = remaining_df.drop(pareto_front.index)
    pareto_final = pareto_final.head(top_n).reset_index(drop=True)
    
    return pareto_final

In [43]:
def get_pareto_fronts(path_and_attack_properties_df, k=10):
    ''' Fetches a specified number of Pareto fronts for plotting. '''
    pareto_fronts = []
    remaining_df = path_and_attack_properties_df.copy()

    print('Might wanna grab a coffe here...')
    for i in range(1, k + 1):
        print(f'Calculating Pareto front {i} of {k}...')
        if remaining_df.empty:
            print(f'Remaining DataFrame is empty. Stopping at front {i}.')
            break
        pareto_front = pareto_optimal(remaining_df, 'Success coefficient', 'Expected troop number')
        pareto_fronts.append(pareto_front)
        remaining_df = remaining_df.drop(pareto_front.index)
        print(f'Pareto front {i} calculated. Remaining rows: {len(remaining_df)}')
    
    return pareto_fronts

In [44]:
def generate_pareto_solutions():
    ''' Function for executing the program generating Pareto solutions and printing progress. '''
    
    criteria1 = 'Success coefficient'
    criteria2 = 'Expected troop number'
    
    # Execute functions.
    simulation_output_df = load_simulation_output_df()
    if not simulation_output_df.empty:
        print('Event chains dataframe has been loaded for Pareto optimization.')
    else:
        print('Could not load event chains dataframe.')
        return
    path_and_attack_properties_df = process_successful_paths(simulation_output_df)
    if not path_and_attack_properties_df.empty:
        print('Successful event chains have been processed.')
    else:
        print('Could not process successful event chains.')
        return
    pareto_optimals = collect_pareto_solutions(path_and_attack_properties_df, # In this case eqal to first Pareto front.
                                               criteria1, criteria2, 10)
    if not pareto_optimals.empty:
        print('Pareto optimal solutions have been calculated.')
    else:
        print('Could not calculate Pareto optimal solutions.')
        return
    pareto_fronts = get_pareto_fronts(path_and_attack_properties_df)
    print('Pareto fronts have been calculated')
    
    # Create dict with info to be used for plotting.
    pareto_info_dict = {'path_and_attack_properties_df': path_and_attack_properties_df,
                        'pareto_optimals': pareto_optimals,
                        'pareto_fronts': pareto_fronts}
    if pareto_info_dict:
        print('Pareto solutions information dictionary has been created.')
    else:
        print('Could not create Pareto information dictionary.')
        return
    
    # Save to pickle.
    with open('pareto_info_dict.pkl', 'wb') as f:
        pickle.dump(pareto_info_dict, f)
    print('Pareto solutions information dictionary has been saved to "pareto_info_dict.pkl".')