In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd 
import numpy as np
import os 
import glob
from government_functions import * 
import ast 
import warnings
warnings.formatwarning = lambda msg, *args, **kwargs: f'{msg}\n'

In [None]:
file_name='Australia.csv'
df=process_gov_csv(f'government/{file_name}')

 now, onto matching :D 

In [2]:
final_df=pd.read_excel('results/complete_dataframe.xlsx',index_col=0)

In [None]:
#all_elections=final_df.columns.to_numpy()
Australia_df=final_df[[col for col in final_df.columns if col.startswith('Australia')]] #this is case sensitive... 
Australia_elections=Australia_df.columns.to_numpy()

In [None]:
def match_ministries_and_elections(countryname,election_dates,df):
    ## takes in the name of the country, a list of election dates and the df from process_gov_csv
    ## to accomodate errors from the political yearbook dataset, begin and end of election cycles are off-set by one month
    ## see i.e. that Germany 09/2013 elected ministers are appointed 31.08.2013 by the political yearbook data
    ## outputs a dict with (election_date,df_for_electionperiod)
    ## df_for_electionperiod has cols: 'Position' and 'Party'

    sorted_elections= sorted(pd.to_datetime(election_dates, format=f'{countryname}-%b-%y'))
    election_period_dict = {}
    for i in range((len(sorted_elections))): 
        #get timeframe for ministerial appointments per election (start: election -1 month)
        start=sorted_elections[i]
        adjusted_start= start - pd.DateOffset(months=1)
        end = (sorted_elections[i+1] if i+1<len(sorted_elections) else pd.Timestamp.today())- pd.DateOffset(months=1)#.strftime('-%b-%y')
        #select all datapoints in this timeframe
        boolean_mask = ((df['Start']>=adjusted_start)&(df['Start']<end))
        df_for_electionperiod=df[boolean_mask]
        key=start.strftime('-%b-%y')
        #however keep correct election date as key 
        election_period_dict[key]=df_for_electionperiod
        
    return election_period_dict

In [None]:
test=match_ministries_and_elections('Australia',Australia_elections,df)

In [None]:
test.keys()

In [None]:
test.get('-Aug-10')

In [None]:
test_2=starting_gov_dict(test,'Australia')

In [3]:
def starting_gov_dict(election_period_dict,countryname): 
    ## takes in a dict from match_ministries_and_elections and the countryname for the warning function
    ## outputs a dict which matches {election_date,(position,party)}
    ## assumes that the start date which appears most often in the period after the election is the date of inauguration of the new cabinet 
    ## includes some warnings for "suspicious results"
    governments_dict = {}
    edge_cases = []
    for election, df_for_election_period in election_period_dict.items():
        mode = df_for_election_period['Start'].mode() #check which is the most often appearing start date for this period
        if not mode.empty:
            start_date = mode[0] #set this date as the beginning for the new government
            starting_gov = df_for_election_period.loc[df_for_election_period['Start'] == start_date, ['Position', 'Party']]
            
            # Handling of cases where the found government was very late in the election period --> likely errors
            election_date = pd.to_datetime(election, format='-%b-%y')
            if not (election_date.year <= start_date.year <= election_date.year + 1):
                shorter_df = df_for_election_period[(df_for_election_period['Start'] > election_date-pd.DateOffset(months=1)) & (df_for_election_period['Start'] <= (election_date + pd.DateOffset(years=1)))]
                new_mode=shorter_df['Start'].mode()
                if not new_mode.empty:
                    start_date = new_mode[0] #set this new date as the beginning for the new government
                    starting_gov = shorter_df.loc[shorter_df['Start'] == start_date, ['Position', 'Party']]
                    #if start_date != mode[0]:  # Check if new mode is different from the original mode
                    warnings.warn(f"{countryname}{election}: Use government from {new_mode[0]}")
                else:
                    warnings.warn(f"{countryname}{election}: No suitable government found within a year after the election. Fall back to late government at {mode[0]}.")
                    edge_cases.append(f"{countryname}{election}")
            # Warning if the found government has few ministers
            if starting_gov.shape[0] <= 8:
                warnings.warn(f"{countryname}-{election}: Only found {starting_gov.shape[0]} ministers.")

            
            
            # Warning if the found government was before the actual election 
            if start_date < election_date:
                warnings.warn(f"{countryname}-{election}: Careful, check db error. Found Government at {start_date}")

        else:
            #warnings.warn(f"No mode found for {countryname}-{election}")
            starting_gov = pd.DataFrame(columns=['Position', 'Party']) #add empty df when no government info is given for the period
        
        governments_dict[election] = starting_gov
    return governments_dict,edge_cases


create loop for all countries: 

In [4]:

special_cases= []
for csv_file in glob.glob(os.path.join('government/','*.csv')): 
    file= os.path.basename(csv_file)
    countryname,_=os.path.splitext(file)
    #create country government dataframe
    df=process_gov_csv(csv_file)
    #grab part of final_df that is relevant for the country
    country_df=final_df[[col for col in final_df.columns if col.startswith(countryname)]] #this is case sensitive... 
    elections=country_df.columns.to_numpy() # list of elections in the country
    #get all election_df's (currently not really necessary but might be if stability or changes in government becomre relevant)
    election_period_dict=match_ministries_and_elections(countryname,elections,df)
    #subset election_df's for only the first ministers in an election period (initial government)
    government_dict,edge_cases=starting_gov_dict(election_period_dict,countryname)
    special_cases.extend(edge_cases)
    ##loop over all election periods: ##
    for date,dataframe in government_dict.items():
        if not dataframe.empty:
            party_str=country_df.at['parties',f'{countryname}{date}']
            parties=ast.literal_eval(party_str) # parses string to list  
            #create dicts
            ministry_dict,unweighted_dict,weighted_dict=get_ministry_dicts(dataframe,parties) 
            #translate dicts into lists / arrays corrosponding to the party list     
            ministy_list=[]
            unweighted_array=np.zeros(len(parties))  
            weighted_array=np.zeros(len(parties))  
            for i,(party,value) in enumerate(ministry_dict.items()): 
                ministy_list.append(value)
            for i,(party,value) in enumerate(unweighted_dict.items()):
                unweighted_array[i]=value
            for i,(party,value) in enumerate(weighted_dict.items()):
                weighted_array[i]=value
            
            # write list/arrays to final_df:
            if 'Ministers' not in final_df.index: 
                final_df.loc['Ministers']=[np.nan] * len(final_df.columns)   
            if 'unweighted' not in final_df.index: 
                final_df.loc['unweighted']=[np.nan] * len(final_df.columns)   
            if 'weighted' not in final_df.index: 
                final_df.loc['weighted']=[np.nan] * len(final_df.columns) 
            final_df.at['Ministers',f'{countryname}{date}']=ministy_list
            final_df.at['unweighted',f'{countryname}{date}']=unweighted_array
            final_df.at['weighted',f'{countryname}{date}']=weighted_array 



    



Australia-Aug-10: Use government from 2010-09-14 00:00:00
Australia--Sep-13: Careful, check db error. Found Government at 2013-08-31 00:00:00
Australia-Jul-16: Use government from 2016-07-19 00:00:00
Austria-Sep-19: Use government from 2020-01-07 00:00:00
Belgium--Nov-91: Careful, check db error. Found Government at 1991-10-03 00:00:00
Bulgaria-Oct-14: Use government from 2014-11-07 00:00:00
Croatia-Nov-07: No suitable government found within a year after the election. Fall back to late government at 2009-07-01 00:00:00.
Cyprus-May-01: No suitable government found within a year after the election. Fall back to late government at 2003-02-28 00:00:00.
Cyprus-May-06: Use government from 2006-07-16 00:00:00
Cyprus--May-06: Only found 4 ministers.
Cyprus-May-11: Use government from 2011-08-05 00:00:00
Cyprus--May-11: Only found 7 ministers.
Cyprus-May-16: No suitable government found within a year after the election. Fall back to late government at 2018-03-01 00:00:00.
Czech-Jun-02: Use gov

In [None]:
with pd.ExcelWriter('combined_dataframe.xlsx', engine='xlsxwriter') as writer:
    final_df.to_excel(writer)