#### This notebook plots the loss by equal shares across different impact areas in terms of 3 proposed impact metrics with 3 varieties each- cost share, project share, popularity share; cost representation, project representation, popularity representation; cost proportionality, project proportionality, popularity proportionality

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import csv

In [2]:
# use this code block to set column and row viewing size/width
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', 10)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

pd.options.display.float_format = '{:.4f}'.format

In [3]:
# read the metadata csv
pbsummary_df = pd.read_csv('../metadata.csv', delimiter=';')
pbsummary_df = pbsummary_df.drop_duplicates()
pbsummary_df

pbsummary_aarau = pd.read_csv('../metadata_aarau.csv', delimiter=';')
pbsummary_df = pd.concat([pbsummary_df, pbsummary_aarau], ignore_index=True)

pbsummary_df['subunit'].fillna(value='all', inplace=True)

In [None]:
# Get election id and vote type, so we know better to segragate approval and score votings
pbsummary_with_vote_type = pbsummary_df[['election_id', 'vote_type']]
print(pbsummary_with_vote_type.head())
print(pbsummary_with_vote_type['vote_type'].value_counts())

In [None]:
# reading the projects CSV and loading to dataframe
pbprojects_df = pd.read_csv('../projects.csv', delimiter=';')
pbprojects_df.drop_duplicates(inplace=True)
print(pbprojects_df.shape)

pbprojects_aarau = pd.read_csv('../projects_aarau.csv', delimiter=';')
pbprojects_df = pd.concat([pbprojects_df, pbprojects_aarau], ignore_index=True)

# merge the column vote_type into pbprojects_df
pbprojects_df = pd.merge(pbprojects_df, pbsummary_with_vote_type, on='election_id', how='inner')
print(pbprojects_df.shape)

pbprojects_df.head()

In [None]:
# Check for projects where cost of a given project is zero

print("Projects with zero costs: ", pbprojects_df[pbprojects_df['cost'] == 0])

# Currently returns a single project ID which has been commented as been removed by City Council
invalid_projects = pbprojects_df[pbprojects_df['cost'] == 0][['project_id','election_id']]
print(invalid_projects)

# Excluding that single project id by checking with particular election id and project id
valid_pbprojects_df = pbprojects_df[~(pbprojects_df['project_id'].isin(invalid_projects['project_id']) & (pbprojects_df['election_id'].isin(invalid_projects['election_id'])))]
print(pbprojects_df.shape)
print(valid_pbprojects_df.shape)

In [None]:
# setting up vote_percent column
print(valid_pbprojects_df.shape)
valid_pb_projects_total_selections = valid_pbprojects_df.groupby(['election_id'])['votes'].sum().reset_index()
print(valid_pb_projects_total_selections.shape)
valid_pb_projects_total_selections.rename(columns={'votes': 'all_project_votes'}, inplace=True)
valid_pbprojects_df = valid_pbprojects_df.merge(valid_pb_projects_total_selections, on='election_id', how='inner')
valid_pbprojects_df['vote_percent'] = round((valid_pbprojects_df['votes'] / valid_pbprojects_df['all_project_votes'] * 100),3)
print(valid_pbprojects_df.shape)

In [8]:
aarau_election_id = valid_pbprojects_df[valid_pbprojects_df['country'] == 'Switzerland'].groupby(['election_id']).first().reset_index()['election_id']
green_budget_election_id = valid_pbprojects_df[valid_pbprojects_df['unit'] == 'Wieliczka'].groupby(['election_id']).first().reset_index()['election_id']


In [9]:
# print("Distinct election IDs are: ", valid_pbprojects_df['election_id'].nunique())
# valid_pbprojects_df_grouped_election = valid_pbprojects_df.groupby(['election_id','is_mes_winner'])['cost'].agg(['sum']).reset_index()
# print(valid_pbprojects_df_grouped_election.head())
# print(valid_pbprojects_df_grouped_election.shape)
# print("Unique election IDs after grouping total costs: ", valid_pbprojects_df_grouped_election['election_id'].nunique())
# mes_winners_grouped_project_count = valid_pbprojects_df_grouped_election[valid_pbprojects_df_grouped_election['is_mes_winner'] == True]
# print(mes_winners_grouped_project_count.shape)

In [10]:
# setting additional column for used budget with MES aggregation
# print(valid_pbprojects_df.shape)

# elections_with_mes_winners = valid_pbprojects_df[valid_pbprojects_df['is_mes_winner'] == True]
# elections_with_greedy_winners = valid_pbprojects_df[valid_pbprojects_df['is_greedy_winner'] == True]
# print("Elections with MES winners: ", elections_with_mes_winners['election_id'].nunique())
# print("Elections with Greedy Winners: ", elections_with_greedy_winners['election_id'].nunique())

# # Getting the total budget usage for MES winning projects of each election_id (grouped) and adding a new column to denote that value
# valid_pb_projects_mes_budget_usage = valid_pbprojects_df[valid_pbprojects_df['is_mes_winner'] == True].groupby(['election_id'])['cost'].sum().reset_index()
# valid_pb_projects_mes_budget_usage.rename(columns={'cost': 'mes_total_budget_usage'}, inplace=True)
# valid_pbprojects_df = valid_pbprojects_df.merge(valid_pb_projects_mes_budget_usage, on='election_id', how='inner')
# valid_pbprojects_df['mes_budget_usage_percent'] = round((valid_pbprojects_df['mes_total_budget_usage'] / valid_pbprojects_df['total_budget'] * 100),3)
# print(valid_pbprojects_df.shape)

# # Getting the total budget usage for utilitarian greedy winning projects of each election_id (grouped) and adding a new column to denote that value
# valid_pb_projects_greedy_budget_usage = valid_pbprojects_df[valid_pbprojects_df['is_greedy_winner']].groupby(['election_id'])['cost'].sum().reset_index()
# valid_pb_projects_greedy_budget_usage.rename(columns={'cost': 'greedy_total_budget_usage'}, inplace=True)
# valid_pbprojects_df = valid_pbprojects_df.merge(valid_pb_projects_greedy_budget_usage, on='election_id', how='inner')
# valid_pbprojects_df['greedy_budget_usage_percent'] = round((valid_pbprojects_df['greedy_total_budget_usage'] / valid_pbprojects_df['total_budget'] * 100),3)
# print(valid_pbprojects_df.shape)

## Fill empty values for category with the label 'uncategorized' to aid in further data analysis

In [None]:
valid_pbprojects_df['category'].value_counts()

# Checking to see if there are empty values for category in the entire project dataset
na_category_count = valid_pbprojects_df['category'].isna().sum()
print("Empty category values for PB projects are: ", na_category_count)

# Fill such empty values of category with the label uncategorized, so that it can aid in further data preprocessing
valid_pbprojects_df['category'].fillna('uncategorized', inplace=True)


print("Emtpy category values after filling na: ", valid_pbprojects_df['category'].isna().sum())
print("`uncategorized` category count for valid pb projects: ", valid_pbprojects_df[valid_pbprojects_df['category'] == 'uncategorized'].shape[0])

## Create additional columns for each category label

In [None]:
# Split the Categories column and create a set of unique categories
categories_set = set(category.strip() for categories in valid_pbprojects_df['category'] for category in categories.split(','))

# Create new columns with default value 0
for category in categories_set:
    valid_pbprojects_df[f'category_{category}'] = 0

# Iterate through rows and update the new columns
for index, row in valid_pbprojects_df.iterrows():
    categories = row['category'].split(',')
    for category in categories:
        valid_pbprojects_df.at[index, f'category_{category.strip()}'] = 1
    
    if(len(categories) == 1 and (categories[0] == 'uncategorized')):
        valid_pbprojects_df.at[index, 'category_labels_count'] = 0
    else:
        valid_pbprojects_df.at[index, 'category_labels_count'] = int(len(categories))

valid_pbprojects_df.tail()

In [13]:
election_total_projects_cost_df = valid_pbprojects_df.groupby(['election_id'])['cost'].sum().reset_index()
election_total_projects_cost_df.rename(columns={'cost': 'election_total_projects_cost'}, inplace=True)
valid_pbprojects_df = valid_pbprojects_df.merge(election_total_projects_cost_df, on='election_id', how='inner')

In [None]:
categorization_df = valid_pbprojects_df
categorization_df = categorization_df[[
       'election_id', 'unit', 'subunit', 'instance', 'project_id', 'vote_type', 'cost', 'election_total_projects_cost', 'votes', 'score', 'is_mes_winner', 'is_greedy_winner', 'is_phragmen_winner', 'category_public transit and roads',
       'category_health', 'category_welfare', 'category_uncategorized',
       'category_public space', 'category_urban greenery', 'category_culture',
       'category_education', 'category_sport',
       'category_environmental protection', 'category_labels_count'
]]
categorization_df.rename(columns={'category_education': 'education', 'category_public transit and roads': 'public_transit_and_roads', 'category_health': 'health', 'category_welfare': 'welfare', 'category_uncategorized':'uncategorized', 'category_public space': 'public_space', 'category_urban greenery': 'urban_greenery', 'category_culture': 'culture', 'category_sport': 'sport', 'category_environmental protection': 'env_protection', 'category_labels_count': 'total_tags' }, inplace=True)

# update votes column to have score values for cumulative voting instances
categorization_df['votes'] = np.where(categorization_df['vote_type'] == 'cumulative', categorization_df['score'], categorization_df['votes'])

# In my earlier logic, I had put uncategorized to each undefined project categorization, however, this did not increase the total category counts. 
# So using this, we can the filter to remove uncategorized pb instances where uncategorized value is gre
# ater than 0
# Also, accordingly, uncategorized > 0 and total count values > 0 must not exists; sanity check
empty_df = categorization_df[(categorization_df['uncategorized'] > 0) & (categorization_df['total_tags'] > 0)]
print("Size of returned df must be 0: ", empty_df.shape)

# Apply an additional filter to remove uncategorized values from the categorization_df
categorization_df = categorization_df[categorization_df['uncategorized'] == 0]

## Create sorted ordering for the 344 PB instances with their % values of winnings in each category for MES and Greedy

In [15]:
# some globals to be used for the code snippets below
categories_set = ['education', 'public_transit_and_roads', 'health', 'welfare', 'public_space', 'urban_greenery', 'culture', 'sport', 'env_protection']

category_title_map = {
    'education': 'Education',
    'public_transit_and_roads': 'Public Transit',
    'health': 'Health',
    'welfare': 'Welfare',
    'public_space': 'Public Space',
    'urban_greenery': 'Urban Greenery', 
    'culture': 'Culture', 
    'sport': 'Sport',
    'env_protection': 'Env. Protection'
}

oneD_to_twoD_map = {
    0: [0,0],
    1: [0,1],
    2: [0,2],
    3: [1,0],
    4: [1,1],
    5: [1,2],
    6: [2,0],
    7: [2,1],
    8: [2,2]
}

## Use the categorization_df dataframe as a base and add new metrics necessary for calculating cost utilization and relative winners for each category of each PB instance

In [16]:
# winning totals for UG and ES; cost, projects, votes
es_total_cost = categorization_df[(categorization_df['is_mes_winner'] == True)].groupby(['election_id'])['cost'].sum().reset_index()
es_total_cost.rename(columns={'cost': 'es_total_cost'}, inplace=True)

ug_total_cost = categorization_df[(categorization_df['is_greedy_winner'] == True)].groupby(['election_id'])['cost'].sum().reset_index()
ug_total_cost.rename(columns={'cost': 'ug_total_cost'}, inplace=True)

es_total_projects = categorization_df[(categorization_df['is_mes_winner'] == True)].groupby(['election_id'])['project_id'].count().reset_index()
es_total_projects.rename(columns={'project_id': 'es_total_projects'}, inplace=True)

ug_total_count = categorization_df[(categorization_df['is_greedy_winner'] == True)].groupby(['election_id'])['project_id'].count().reset_index()
ug_total_count.rename(columns={'project_id': 'ug_total_projects'}, inplace=True)

es_total_popularity = categorization_df[(categorization_df['is_mes_winner'] == True)].groupby(['election_id'])['votes'].sum().reset_index()
es_total_popularity.rename(columns={'votes': 'es_total_popularity'}, inplace=True)

ug_total_popularity = categorization_df[(categorization_df['is_greedy_winner'] == True)].groupby(['election_id'])['votes'].sum().reset_index()
ug_total_popularity.rename(columns={'votes': 'ug_total_popularity'}, inplace=True)

# Merge these dataset with categorization_df
categorization_df = categorization_df.merge(es_total_cost, on='election_id', how='inner')
categorization_df = categorization_df.merge(ug_total_cost, on='election_id', how='inner')
categorization_df = categorization_df.merge(es_total_projects, on='election_id', how='inner')
categorization_df = categorization_df.merge(ug_total_count, on='election_id', how='inner')
categorization_df = categorization_df.merge(es_total_popularity, on='election_id', how='inner')
categorization_df = categorization_df.merge(ug_total_popularity, on='election_id', how='inner')

In [None]:
print("Shape of categorization_df : ", categorization_df.shape)

# for each category; add columns that signify the total cost of each category per PB instance
# Create columns to store selection of percentages

# Perform grouping based on catgories and then add to respective costs columns
for category in categories_set:
    # category total cost
    category_total_cost = categorization_df[categorization_df[category] == 1].groupby(['election_id'])['cost'].sum().reset_index()
    category_total_cost.rename(columns={'cost': f'{category}_total_cost'}, inplace=True)

    # category total projects
    category_total_projects = categorization_df[categorization_df[category] == 1].groupby(['election_id'])['project_id'].count().reset_index()
    category_total_projects.rename(columns={'project_id': f'{category}_total_projects'}, inplace=True)

    # category total popularity
    category_total_popularity = categorization_df[categorization_df[category] == 1].groupby(['election_id'])['votes'].sum().reset_index()
    category_total_popularity.rename(columns={'votes': f'{category}_total_popularity'}, inplace=True)

    # merge
    categorization_df = categorization_df.merge(category_total_cost, how='left', on='election_id')
    categorization_df = categorization_df.merge(category_total_projects, how='left', on='election_id')
    categorization_df = categorization_df.merge(category_total_popularity, how='left', on='election_id')

# For the above columns, there can be NA values, replace them with zeros
for category in categories_set:
    categorization_df[f'{category}_total_cost'].fillna(0, inplace=True)
    categorization_df[f'{category}_total_projects'].fillna(0, inplace=True)
    categorization_df[f'{category}_total_popularity'].fillna(0, inplace=True)

In [None]:
# Merge categorization_df with pbsummary to get total projects in all elections
pbsummary_num_votes = pbsummary_df[['election_id', 'num_projects']]
categorization_df = categorization_df.merge(pbsummary_num_votes, on='election_id', how='inner')
categorization_df

In [19]:
# Adding total cost of each category in each election for greedy winners
# Perform grouping based on catgories and then add to respective costs columns
for category in categories_set:
    # temporarily grouped df 
    greedy_winners_category_grouped_df = categorization_df[(categorization_df[category] == 1) & (categorization_df['is_greedy_winner'] == True)].groupby(['election_id'])['cost'].sum().reset_index()
    greedy_winners_category_grouped_df.rename(columns={'cost': f'{category}_ug_total_cost'}, inplace=True)
    
    # Check if not using assignment but just using left join works or not; of course it wouldn't work, because merge returns the results in an entirely different dataset
    categorization_df = categorization_df.merge(greedy_winners_category_grouped_df, how='left', on='election_id')

# For the above columns, there can be NA values, replace them with zeros
for category in categories_set:
    categorization_df[f'{category}_ug_total_cost'].fillna(0, inplace=True)

# Adding total cost of each category in each election for MES winners
for category in categories_set:
    # temporarily grouped df 
    mes_winners_category_grouped_df = categorization_df[(categorization_df[category] == 1) & (categorization_df['is_mes_winner'] == True)].groupby(['election_id'])['cost'].sum().reset_index()
    mes_winners_category_grouped_df.rename(columns={'cost': f'{category}_es_total_cost'}, inplace=True)

    # Check if not using assignment but just using left join works or not; of course it wouldn't work, because merge returns the results in an entirely different dataset
    categorization_df = categorization_df.merge(mes_winners_category_grouped_df, how='left', on='election_id')

# For the above columns, there can be NA values, replace them with zeros
for category in categories_set:
    categorization_df[f'{category}_es_total_cost'].fillna(0, inplace=True)


In [20]:
# Project counts in each category that are Greedy winners
# Perform grouping based on catgories and then add to respective costs columns
for category in categories_set:
    # temporarily grouped df 
    temp_grouped_df = categorization_df[(categorization_df[category] == 1) & (categorization_df['is_greedy_winner'] == True)].groupby(['election_id'])['project_id'].count().reset_index()
    temp_grouped_df.rename(columns={'project_id': f'{category}_ug_total_projects'}, inplace=True)

    # Check if not using assignment but just using left join works or not; of course it wouldn't work, because merge returns the results in an entirely different dataset
    categorization_df = categorization_df.merge(temp_grouped_df, how='left', on='election_id')

# For the above columns, there can be NA values, replace them with zeros
for category in categories_set:
    categorization_df[f'{category}_ug_total_projects'].fillna(0, inplace=True)


# Project counts in each category that are MES Winners
# Perform grouping based on catgories and then add to respective costs columns
for category in categories_set:
    # temporarily grouped df 
    temp_grouped_df = categorization_df[(categorization_df[category] == 1) & (categorization_df['is_mes_winner'] == True)].groupby(['election_id'])['project_id'].count().reset_index()
    temp_grouped_df.rename(columns={'project_id': f'{category}_es_total_projects'}, inplace=True)

    # Check if not using assignment but just using left join works or not; of course it wouldn't work, because merge returns the results in an entirely different dataset
    categorization_df = categorization_df.merge(temp_grouped_df, how='left', on='election_id')

# For the above columns, there can be NA values, replace them with zeros
for category in categories_set:
    categorization_df[f'{category}_es_total_projects'].fillna(0, inplace=True)

##  Metrics for Budget Share and Winning Rate

In [None]:
# metrics calculation required for relative winners
print("Current shape of categorization df is: ", categorization_df.shape)

for category in categories_set:
    categorization_df[f'{category}_ug_cost_share'] = 1 * categorization_df[f'{category}_ug_total_cost'] / categorization_df['ug_total_cost']
    categorization_df[f'{category}_es_cost_share'] = 1 * categorization_df[f'{category}_es_total_cost'] / categorization_df['es_total_cost']
    categorization_df[f'{category}_loss_cost_share'] = categorization_df[f'{category}_ug_cost_share'] - categorization_df[f'{category}_es_cost_share']

    categorization_df[f'{category}_ug_project_share'] = 1 * categorization_df[f'{category}_ug_total_projects'] / categorization_df['ug_total_projects']
    categorization_df[f'{category}_es_project_share'] = 1 * categorization_df[f'{category}_es_total_projects'] / categorization_df['es_total_projects']
    categorization_df[f'{category}_loss_project_share'] = categorization_df[f'{category}_ug_project_share'] - categorization_df[f'{category}_es_project_share']
    

In [22]:
categories_color_map = {
    'education': '#d53e4f',
    'public_transit_and_roads': '#f46d43',
    'health': '#fdae61',
    'welfare': '#fee08b',
    'public_space': '#ffffbf',
    'urban_greenery': '#e6f598',
    'culture': '#abdda4',
    'sport': '#66c2a5',
    'env_protection': '#3288bd'
}

## Metrics for Cost Representation and Project Representation

In [None]:
# New metrics required relative proposals in each category
print("Before adding new metrics, shape was: ", categorization_df.shape)
for category in categories_set:
    categorization_df[f'{category}_ug_cost_rep'] = 1 * categorization_df[f'{category}_ug_total_cost'] / categorization_df[f'{category}_total_cost']
    categorization_df[f'{category}_es_cost_rep'] = 1 * categorization_df[f'{category}_es_total_cost'] / categorization_df[f'{category}_total_cost']
    categorization_df[f'{category}_loss_cost_rep'] = categorization_df[f'{category}_ug_cost_rep'] - categorization_df[f'{category}_es_cost_rep']
    
    categorization_df[f'{category}_ug_project_rep'] = 1 * categorization_df[f'{category}_ug_total_projects'] / categorization_df[f'{category}_total_projects']
    categorization_df[f'{category}_es_project_rep'] = 1 * categorization_df[f'{category}_es_total_projects'] / categorization_df[f'{category}_total_projects']
    categorization_df[f'{category}_loss_project_rep'] = categorization_df[f'{category}_ug_project_rep'] - categorization_df[f'{category}_es_project_rep']
    


### Plot for Impact loss by Equal Shares for impact areas in terms of budget share, winning rate, cost representation and project representation

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 12), sharex=True, sharey=True)

## first plot; budget share
bsflippingPoints = {
}

bs_num_election_map = {
}


# First loop through all categories to determine the order of flipping points
for idx, category in enumerate(categories_set):
    threshold_found = False
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_cost_share']]
    temp_df.drop_duplicates(inplace=True)
    bs_num_election_map[category] = temp_df[f'{category}_loss_cost_share'].count()
    diff_relative_winners_cost_pct_category = temp_df[['election_id', f'{category}_loss_cost_share']].sort_values(by=f'{category}_loss_cost_share', ascending=False).reset_index()
    
    for i, row in diff_relative_winners_cost_pct_category.iterrows():
        # Condition check for finding threshold
        if (threshold_found  == False) and (~(row[f'{category}_loss_cost_share'] > 0)):
            bsflippingPoints[category] = i
            threshold_found = True
            break

bsflippingPointsSorted = sorted(bsflippingPoints, key=bsflippingPoints.get)
overall_bs_positive = []
overall_bs_negative = []

# Second loop across all categories set to actually plot the lines in the flipping order
for idx, category in enumerate(bsflippingPointsSorted):
    # additional metrics for percentage representation
    num_elections = temp_df.shape[0]
    flippingPointVal = bsflippingPoints[category]
    flippingPointPct = 100 * flippingPointVal / bs_num_election_map[category]

    cur_color = categories_color_map[category]
    
    temp_df = categorization_df[['election_id', f'{category}_loss_cost_share']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_winners_cost_pct_category = temp_df[['election_id', f'{category}_loss_cost_share']].sort_values(by=f'{category}_loss_cost_share', ascending=False).reset_index()

    diff_relative_winners_cost_pct_category_avg = np.mean(diff_relative_winners_cost_pct_category[f'{category}_loss_cost_share'])
    diff_relative_winners_cost_pct_category_positive_avg = np.mean(diff_relative_winners_cost_pct_category[diff_relative_winners_cost_pct_category[f'{category}_loss_cost_share'] > 0][f'{category}_loss_cost_share'])
    diff_relative_winners_cost_pct_category_negative_avg = np.mean(diff_relative_winners_cost_pct_category[diff_relative_winners_cost_pct_category[f'{category}_loss_cost_share'] < 0][f'{category}_loss_cost_share'])

    overall_bs_positive.append(diff_relative_winners_cost_pct_category_positive_avg)
    overall_bs_negative.append(diff_relative_winners_cost_pct_category_negative_avg)

    cur_category_label = f'{category_title_map[category]}: {flippingPointPct:.0f}%, [~{diff_relative_winners_cost_pct_category_avg:.2f}; +{diff_relative_winners_cost_pct_category_positive_avg:.2f}; {diff_relative_winners_cost_pct_category_negative_avg:.2f}]'

    for i, row in diff_relative_winners_cost_pct_category.iterrows():
        # additional condition to any specific row data, just for labeling
        if i == 0:
            axes[0][0].plot(
                i, row[f'{category}_loss_cost_share'],
                marker='o' if row[f'{category}_loss_cost_share'] >= 0 else '*', 
                markerfacecolor= cur_color, 
                markeredgewidth=1, 
                markeredgecolor=cur_color if row[f'{category}_loss_cost_share'] >= 0 else 'none', 
                label=cur_category_label, 
                markersize=4, 
                alpha=0.8,
                linestyle='none'
            )
        else:
            axes[0][0].plot(
                    i, row[f'{category}_loss_cost_share'], 
                    marker='o' if row[f'{category}_loss_cost_share'] >= 0 else '*', 
                    markerfacecolor= cur_color, 
                    markeredgewidth=1, 
                    markeredgecolor=cur_color, 
                    markersize=4, 
                    alpha=0.8,
                    linestyle='none'
                )
            
# Third loop across to present the flipping point value on the outermost layer
for idx, category in enumerate(bsflippingPointsSorted):
    # threshold_found = False
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_cost_share']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_winners_cost_pct_category = temp_df[['election_id', f'{category}_loss_cost_share']].sort_values(by=f'{category}_loss_cost_share', ascending=False).reset_index()
    
    # additional variable to store flipping point value for the category
    flippingPointVal = bsflippingPoints[category]

    for i, row in diff_relative_winners_cost_pct_category.iterrows():  
        # additional condition check to match the nth item for flipping point value
        if (i == flippingPointVal):
            # mark such observation with outer black edge color
            axes[0][0].plot(
                    i, row[f'{category}_loss_cost_share'], 
                    marker='o',
                    markerfacecolor= cur_color, 
                    markeredgecolor='black', 
                )
            
overall_bs_positive_avg = np.mean(overall_bs_positive)
overall_bs_negative_avg = np.mean(overall_bs_negative)

axes[0][0].annotate(f'+{overall_bs_positive_avg:.2f}', xy=(0, 0), xytext=(50, 0.5), fontsize=20)
axes[0][0].annotate(f'{overall_bs_negative_avg:.2f}', xy=(0, 0), xytext=(285, -0.7), fontsize=20)

axes[0][0].legend(frameon=False, handlelength=1.0, handletextpad=0.5, fontsize=12)
axes[0][0].set_title("A           Budget Share", loc="left", fontsize=22, fontdict={'fontweight': 'bold'})
axes[0][0].grid(axis='both', which='major', color='gray', alpha=0.1)
axes[0][0].tick_params(axis='both', labelsize=16)
## end of first plot

## second plot; winning rate
wrflippingPoints = {
}

wr_num_election_map = {
}

# First loop through all categories to determine the order of flipping points
for idx, category in enumerate(categories_set):
    threshold_found = False
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_project_share']]
    temp_df.drop_duplicates(inplace=True)
    wr_num_election_map[category] = temp_df[f'{category}_loss_project_share'].count()
    diff_relative_winners_count_pct_category = temp_df[['election_id', f'{category}_loss_project_share']].sort_values(by=f'{category}_loss_project_share', ascending=False).reset_index()
    
    for i, row in diff_relative_winners_count_pct_category.iterrows():
        # Condition check for finding threshold
        if (threshold_found  == False) and (~(row[f'{category}_loss_project_share'] > 0)):
            wrflippingPoints[category] = i
            threshold_found = True
            break

wrflippingPointsSorted = sorted(wrflippingPoints, key=wrflippingPoints.get)

overall_wr_positive = []
overall_wr_negative = []

# Second loop across all categories set to actually plot the lines in the flipping order
for idx, category in enumerate(wrflippingPointsSorted):
    
    # additional metrics for percentage representation for categories in legends
    num_elections = temp_df.shape[0]
    flippingPointVal = wrflippingPoints[category]
    flippingPointPct = 100 * flippingPointVal / wr_num_election_map[category]

    cur_color = categories_color_map[category]
    
    temp_df = categorization_df[['election_id', f'{category}_loss_project_share']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_winners_count_pct_category = temp_df[['election_id', f'{category}_loss_project_share']].sort_values(by=f'{category}_loss_project_share', ascending=False).reset_index()
    
    diff_relative_winners_count_pct_category_avg = np.mean(diff_relative_winners_count_pct_category[f'{category}_loss_project_share'])
    diff_relative_winners_count_pct_category_positive_avg = np.mean(diff_relative_winners_count_pct_category[diff_relative_winners_count_pct_category[f'{category}_loss_project_share'] > 0][f'{category}_loss_project_share'])
    diff_relative_winners_count_pct_category_negative_avg = np.mean(diff_relative_winners_count_pct_category[diff_relative_winners_count_pct_category[f'{category}_loss_project_share'] < 0][f'{category}_loss_project_share'])
    
    overall_wr_positive.append(diff_relative_winners_count_pct_category_positive_avg)
    overall_wr_negative.append(diff_relative_winners_count_pct_category_negative_avg)
    
    cur_category_label = f'{category_title_map[category]}: {flippingPointPct:.0f}%, [~{diff_relative_winners_count_pct_category_avg:.2f}; +{diff_relative_winners_count_pct_category_positive_avg:.2f}; {diff_relative_winners_count_pct_category_negative_avg:.2f}]'

    for i, row in diff_relative_winners_count_pct_category.iterrows():
        # additional condition to any specific row data, just for labeling
        if i == 0:
            axes[0][1].plot(
                i, row[f'{category}_loss_project_share'],
                marker='o' if row[f'{category}_loss_project_share'] >= 0 else '*', 
                markerfacecolor= cur_color, 
                markeredgewidth=1, 
                markeredgecolor=cur_color if row[f'{category}_loss_project_share'] >= 0 else 'none', 
                label=cur_category_label, 
                markersize=4, 
                alpha=0.8,
                linestyle='none'
            )
        else:
            axes[0][1].plot(
                    i, row[f'{category}_loss_project_share'], 
                    marker='o' if row[f'{category}_loss_project_share'] >= 0 else '*', 
                    markerfacecolor= cur_color, 
                    markeredgewidth=1, 
                    markeredgecolor=cur_color, 
                    markersize=4, 
                    alpha=0.8,
                    linestyle='none'
                )
            
# Third loop across to present the flipping point value on the outermost layer
for idx, category in enumerate(wrflippingPointsSorted):
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_project_share']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_winners_count_pct_category = temp_df[['election_id', f'{category}_loss_project_share']].sort_values(by=f'{category}_loss_project_share', ascending=False).reset_index()
    
    # additional variable to store flipping point value for the category
    flippingPointVal = wrflippingPoints[category]

    for i, row in diff_relative_winners_count_pct_category.iterrows():  
        # additional condition check to match the nth item for flipping point value
        if (i == flippingPointVal):
            # mark such observation with outer black edge color
            axes[0][1].plot(
                    i, row[f'{category}_loss_project_share'], 
                    marker='o',
                    markerfacecolor= cur_color, 
                    markeredgecolor='black', 
                )
            
overall_wr_positive_avg = np.mean(overall_wr_positive)
overall_wr_negative_avg = np.mean(overall_wr_negative)

axes[0][1].annotate(f'+{overall_wr_positive_avg:.2f}', xy=(0, 0), xytext=(10, 0.5), fontsize=20)
axes[0][1].annotate(f'{overall_wr_negative_avg:.2f}', xy=(0, 0), xytext=(250, -0.5), fontsize=20)

axes[0][1].legend(frameon=False, handlelength=1.0, handletextpad=0.5, fontsize=12)
axes[0][1].set_title("B           Winning Rate", loc="left", fontsize=22, fontdict={'fontweight': 'bold'})
axes[0][1].grid(axis='both', which='major', color='gray', alpha=0.1)
axes[0][0].tick_params(axis='both', labelsize=16)
## end of second plot

## third plot; cost representation
crflippingPoints = {
}

cr_num_election_map = {
}

# First loop through all categories to determine the order of flipping points
for idx, category in enumerate(categories_set):
    threshold_found = False
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_cost_rep']]
    temp_df.drop_duplicates(inplace=True)
    cr_num_election_map[category] = temp_df[f'{category}_loss_cost_rep'].count()
    diff_relative_proposals_cost_pct_category = temp_df[['election_id', f'{category}_loss_cost_rep']].sort_values(by=f'{category}_loss_cost_rep', ascending=False).reset_index()
    
    for i, row in diff_relative_proposals_cost_pct_category.iterrows():
        # Condition check for finding threshold
        if (threshold_found  == False) and (~(row[f'{category}_loss_cost_rep'] > 0)):
            crflippingPoints[category] = i
            threshold_found = True
            break

crflippingPointsSorted = sorted(crflippingPoints, key=crflippingPoints.get)

overall_cr_positive = []
overall_cr_negative = []

# Second loop across all categories set to actually plot the lines in the flipping order
for idx, category in enumerate(crflippingPointsSorted):
    
    # additional metrics for percentage representation for categories in legends
    num_elections = temp_df.shape[0]
    flippingPointVal = crflippingPoints[category]
    flippingPointPct = 100 * flippingPointVal / cr_num_election_map[category]

    cur_color = categories_color_map[category]
    
    temp_df = categorization_df[['election_id', f'{category}_loss_cost_rep']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_proposals_cost_pct_category = temp_df[['election_id', f'{category}_loss_cost_rep']].sort_values(by=f'{category}_loss_cost_rep', ascending=False).reset_index()
    
    diff_relative_proposals_cost_pct_category_avg = np.mean(diff_relative_proposals_cost_pct_category[f'{category}_loss_cost_rep'])
    diff_relative_proposals_cost_pct_category_positive_avg = np.mean(diff_relative_proposals_cost_pct_category[diff_relative_proposals_cost_pct_category[f'{category}_loss_cost_rep'] > 0][f'{category}_loss_cost_rep'])
    diff_relative_proposals_cost_pct_category_negative_avg = np.mean(diff_relative_proposals_cost_pct_category[diff_relative_proposals_cost_pct_category[f'{category}_loss_cost_rep'] < 0][f'{category}_loss_cost_rep'])
    
    overall_cr_positive.append(diff_relative_proposals_cost_pct_category_positive_avg)
    overall_cr_negative.append(diff_relative_proposals_cost_pct_category_negative_avg)
    
    cur_category_label = f'{category_title_map[category]}: {flippingPointPct:.0f}%, [~{diff_relative_proposals_cost_pct_category_avg:.2f}; +{diff_relative_proposals_cost_pct_category_positive_avg:.2f}; {diff_relative_proposals_cost_pct_category_negative_avg:.2f}]'

    for i, row in diff_relative_proposals_cost_pct_category.iterrows():
        # additional condition to any specific row data, just for labeling
        if i == 0:
            axes[1][0].plot(
                i, row[f'{category}_loss_cost_rep'],
                marker='o' if row[f'{category}_loss_cost_rep'] >= 0 else '*', 
                markerfacecolor= cur_color, 
                markeredgewidth=1, 
                markeredgecolor=cur_color if row[f'{category}_loss_cost_rep'] >= 0 else 'none', 
                label=cur_category_label, 
                markersize=4, 
                alpha=0.8,
                linestyle='none'
            )
        else:
            axes[1][0].plot(
                    i, row[f'{category}_loss_cost_rep'], 
                    marker='o' if row[f'{category}_loss_cost_rep'] >= 0 else '*', 
                    markerfacecolor= cur_color, 
                    markeredgewidth=1, 
                    markeredgecolor=cur_color, 
                    markersize=4, 
                    alpha=0.8,
                    linestyle='none'
                )
            
# Third loop across to present the flipping point value on the outermost layer
for idx, category in enumerate(crflippingPointsSorted):
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_cost_rep']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_proposals_cost_pct_category = temp_df[['election_id', f'{category}_loss_cost_rep']].sort_values(by=f'{category}_loss_cost_rep', ascending=False).reset_index()
    
    # additional variable to store flipping point value for the category
    flippingPointVal = crflippingPoints[category]

    for i, row in diff_relative_proposals_cost_pct_category.iterrows():  
        # additional condition check to match the nth item for flipping point value
        if (i == flippingPointVal):
            # mark such observation with outer black edge color
            axes[1][0].plot(
                    i, row[f'{category}_loss_cost_rep'], 
                    marker='o',
                    markerfacecolor= cur_color, 
                    markeredgecolor='black', 
                )
            
overall_cr_positive_avg = np.mean(overall_cr_positive)
overall_cr_negative_avg = np.mean(overall_cr_negative)

axes[1][0].annotate(f'+{overall_cr_positive_avg:.2f}', xy=(0, 0), xytext=(10, 0.75), fontsize=20)
axes[1][0].annotate(f'{overall_cr_negative_avg:.2f}', xy=(0, 0), xytext=(50, -0.75), fontsize=20)

axes[1][0].legend(frameon=False, handlelength=1.0, handletextpad=0.5, fontsize=12)
axes[1][0].set_title("C      Cost Representation", loc="left", fontsize=22, fontdict={'fontweight': 'bold'})
axes[1][0].grid(axis='both', which='major', color='gray', alpha=0.1)
axes[1][0].tick_params(axis='both', labelsize=16)
## end of third plot

## fourth plot; project representation
rrflippingPoints = {
}

rr_num_election_map = {
}

# First loop through all categories to determine the order of flipping points
for idx, category in enumerate(categories_set):
    threshold_found = False
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_project_rep']]
    temp_df.drop_duplicates(inplace=True)
    rr_num_election_map[category] = temp_df[f'{category}_loss_project_rep'].count()
    diff_relative_proposals_count_pct_category = temp_df[['election_id', f'{category}_loss_project_rep']].sort_values(by=f'{category}_loss_project_rep', ascending=False).reset_index()
    
    for i, row in diff_relative_proposals_count_pct_category.iterrows():
        # Condition check for finding threshold
        if (threshold_found  == False) and (~(row[f'{category}_loss_project_rep'] > 0)):
            rrflippingPoints[category] = i
            threshold_found = True
            break

rrflippingPointsSorted = sorted(rrflippingPoints, key=rrflippingPoints.get)

overall_rr_positive = []
overall_rr_negative = []

# Second loop across all categories set to actually plot the lines in the flipping order
for idx, category in enumerate(rrflippingPointsSorted):
    
    # additional metrics for percentage representation for categories in legends
    num_elections = temp_df.shape[0]
    flippingPointVal = rrflippingPoints[category]
    flippingPointPct = 100 * flippingPointVal / rr_num_election_map[category]

    cur_color = categories_color_map[category]
    
    temp_df = categorization_df[['election_id', f'{category}_loss_project_rep']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_proposals_count_pct_category = temp_df[['election_id', f'{category}_loss_project_rep']].sort_values(by=f'{category}_loss_project_rep', ascending=False).reset_index()
    
    diff_relative_proposals_count_pct_category_avg = np.mean(diff_relative_proposals_count_pct_category[f'{category}_loss_project_rep'])
    diff_relative_proposals_count_pct_category_positive_avg = np.mean(diff_relative_proposals_count_pct_category[diff_relative_proposals_count_pct_category[f'{category}_loss_project_rep'] > 0][f'{category}_loss_project_rep'])
    diff_relative_proposals_count_pct_category_negative_avg = np.mean(diff_relative_proposals_count_pct_category[diff_relative_proposals_count_pct_category[f'{category}_loss_project_rep'] < 0][f'{category}_loss_project_rep'])

    overall_rr_positive.append(diff_relative_proposals_count_pct_category_positive_avg)
    overall_rr_negative.append(diff_relative_proposals_count_pct_category_negative_avg)

    cur_category_label = f'{category_title_map[category]}: {flippingPointPct:.0f}%, [~{diff_relative_proposals_count_pct_category_avg:.2f}; +{diff_relative_proposals_count_pct_category_positive_avg:.2f}; {diff_relative_proposals_count_pct_category_negative_avg:.2f}]'

    for i, row in diff_relative_proposals_count_pct_category.iterrows():
        # additional condition to any specific row data, just for labeling
        if i == 0:
            axes[1][1].plot(
                i, row[f'{category}_loss_project_rep'],
                marker='o' if row[f'{category}_loss_project_rep'] >= 0 else '*', 
                markerfacecolor= cur_color, 
                markeredgewidth=1, 
                markeredgecolor=cur_color if row[f'{category}_loss_project_rep'] >= 0 else 'none', 
                label=cur_category_label, 
                markersize=4, 
                alpha=0.8,
                linestyle='none'
            )
        else:
            axes[1][1].plot(
                    i, row[f'{category}_loss_project_rep'], 
                    marker='o' if row[f'{category}_loss_project_rep'] >= 0 else '*', 
                    markerfacecolor= cur_color, 
                    markeredgewidth=1, 
                    markeredgecolor=cur_color, 
                    markersize=4, 
                    alpha=0.8,
                    linestyle='none'
                )
            
# Third loop across to present the flipping point value on the outermost layer
for idx, category in enumerate(rrflippingPointsSorted):
    cur_color = categories_color_map[category]
    temp_df = categorization_df[['election_id', f'{category}_loss_project_rep']]
    temp_df.drop_duplicates(inplace=True)
    diff_relative_proposals_count_pct_category = temp_df[['election_id', f'{category}_loss_project_rep']].sort_values(by=f'{category}_loss_project_rep', ascending=False).reset_index()
    
    # additional variable to store flipping point value for the category
    flippingPointVal = rrflippingPoints[category]

    for i, row in diff_relative_proposals_count_pct_category.iterrows():  
        # additional condition check to match the nth item for flipping point value
        if (i == flippingPointVal):
            # mark such observation with outer black edge color
            axes[1][1].plot(
                    i, row[f'{category}_loss_project_rep'], 
                    marker='o',
                    markerfacecolor= cur_color, 
                    markeredgecolor='black', 
                )
            
overall_rr_positive_avg = np.mean(overall_rr_positive)
overall_rr_negative_avg = np.mean(overall_rr_negative)

axes[1][1].annotate(f'+{overall_rr_positive_avg:.2f}', xy=(0, 0), xytext=(10, 0.75), fontsize=20)
axes[1][1].annotate(f'{overall_rr_negative_avg:.2f}', xy=(0, 0), xytext=(50, -0.75), fontsize=20)

axes[1][1].legend(frameon=False, handlelength=1.0, handletextpad=0.5, fontsize=12)
axes[1][1].set_title("D   Project Representation", loc="left", fontsize=22, fontdict={'fontweight': 'bold'})
axes[1][1].grid(axis='both', which='major', color='gray', alpha=0.1)
axes[1][1].tick_params(axis='both', labelsize=16)
## end of fourth plot

fig.text(0.5, -0.02, 'Voting Instances (Sorted)', fontsize=20, ha='center', va='center')
fig.text(-0.02, 0.5, 'Impact Loss by Equal Shares (UG - ES)', ha='center', va='center', rotation='vertical', fontsize=20)

plt.tight_layout()
plt.show()