In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.lines import Line2D

In [None]:
crises_df = pd.read_csv('data/african_crises.csv')
# Replace values in banking_crisis with boolean values
crises_df = crises_df.replace({'banking_crisis': {'crisis': 1, 'no_crisis': 0}})

print(crises_df.info())

In [None]:
# Returns the 3 letter country code of the colonizer of that country
def get_colonist(cc3):
    
    english_colonies = ['EGY', 'KEN', 'NGA', 'ZAF', 'ZMB', 'ZWE', 'MUS']
    french_colonies = ['DZA', 'CIV', 'CAF', 'MAR', 'TUN']
    portuguese_colonies = ['AGO']

    if cc3 in portuguese_colonies:
        return 'PRT'
    elif cc3 in english_colonies:
        return 'GBR'
    elif cc3 in french_colonies:
        return 'FRA'
    
# Color codes of former colonists of the african countries
def get_color(cc3):
    
    cmap = {'GBR': 'r', 'FRA': 'b', 'PRT': 'g'}
    return cmap[get_colonist(cc3)]


def plot_column_time_series(dataframe, column, start_year=1800, colour='colonial', 
                            excluded_countries=[], scale='linear', exclude_zeros=False):
    """Plots a column as a time series with years on the x-axis and the column on the y-axis
        column: columns we wish to plot
        start_year: year from which our time-series starts (1957 is first year we have data from all countries)
        color=How to colour our lines (colonial is colored according to fmr. colonists)
        excluded_countries= a list of country codes of countries we wish to excluded
        scale=The scale of our y-axis (linear is default)
        exclude_zeros: exclude countries that only have zeroes in the column"""
    
    # Gather all data gathered after start_year as this is the minimum year we have for each country
    crises_df_after_start_year = crises_df.loc[crises_df['year'] >= start_year]

    # Gather all unique country codes we have
    ccs = crises_df['cc3'].unique()

    # Iterate and plot the inflation time-series
    for cc in ccs:
        # Don't plot excluded countries
        if not cc in excluded_countries:
            years = crises_df_after_start_year.loc[crises_df['cc3'] == cc]['year']
            column_values = crises_df_after_start_year.loc[crises_df['cc3'] == cc][column]
            if colour == 'colonial':
                plot_colour = get_color(cc)
            else:
                plot_colour = None
            plt.plot(years, column_values, color=plot_colour)

    plt.title(f'{column} after {start_year} ({" ".join(excluded_countries)} removed)')
    plt.xlabel('Years')
    plt.ylabel(f'{column}')
    plt.yscale(scale)
    if colour == 'colonial':
        # Create custom legend to only signify the colonists
        custom_lines = [Line2D([0], [0], color='r'),
                        Line2D([0], [0], color='b'),
                        Line2D([0], [0], color='g')]
        plt.legend(custom_lines, ['GBR', 'FRA', 'PRT'])
    plt.show()

In [None]:
plot_column_time_series(crises_df, 'inflation_annual_cpi', 
                        scale='log', colour='own')

In [None]:
plot_column_time_series(crises_df, 'inflation_annual_cpi', 
                        start_year=1975, excluded_countries=['ZWE', 'AGO'])

In [None]:
plot_column_time_series(crises_df, 'gdp_weighted_default', colour='own')

In [None]:
plot_column_time_series(crises_df, 'gdp_weighted_default', start_year=1980, 
                        exclude_zeros=True)

In [None]:
# Columns that signify what rows/years had crises
crises_cols = ['systemic_crisis', 'domestic_debt_in_default', 
                'currency_crises', 'inflation_crises', 
                'banking_crisis']

ccs = crises_df['cc3'].unique()
crises_df_after_1957 = crises_df.loc[crises_df['year'] > 1957]

# Gather all boolean crises from after 1957
bool_crises_df = crises_df_after_1957[['systemic_crisis', 'domestic_debt_in_default', 
                                       'currency_crises', 'inflation_crises', 
                                       'banking_crisis', 'cc3']]

# Iterate over columns that signify crises
for crisis in crises_cols:
    crisis_counts = []
    # For each country save the sum (amount of crisis years)
    for cc in ccs:
        crisis_counts.append(bool_crises_df[crisis].loc[bool_crises_df['cc3'] == cc].sum())
        
    # Sort country codes and counts based on counts
    cc_sorted, counts_sorted = zip(*[(x, y) for y, x in sorted(zip(crisis_counts, ccs))])
    
    # Give each country a color
    colors = [get_color(cc) for cc in cc_sorted]
    plt.title(crisis)
    plt.bar(cc_sorted, counts_sorted, color=colors)
    plt.show()

In [None]:
# Dictionary with keys being country codes and their values the date of independence
y_of_independence = {}

for cc in ccs:
    country_crises_df = crises_df.loc[crises_df.cc3 == cc]
    # Gather year of independence in our dictionary
    y_of_independence[cc] = country_crises_df[country_crises_df.independence == 1].iloc[0].year


for cc in ccs:

    country_crises_df = crises_df.loc[crises_df.cc3 == cc]
    years = country_crises_df.year
    
    # Sum up amount of crises in the country
    total_crises_count_yearly = country_crises_df[crises_cols].sum(axis=1)
    country_name = country_crises_df['country'].iloc[0]
    
    plt.title(f'No. of crises in {country_name} with year of independence labeled')
    plt.xlabel('Years')
    plt.ylabel('No. of crises')

    plt.plot(years, total_crises_count_yearly, color=get_color(cc))
    plt.axvline(y_of_independence[cc], color='c', label='year of independence')
    
    # Set y-ticks since we don't need intermediate values
    plt.yticks([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4])

    plt.legend(loc='upper right')
    plt.show()