In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.lines import Line2D

In [None]:
crises_df = pd.read_csv('data/african_crises.csv')
print(crises_df.isnull().sum())

In [None]:
# Returns the 3 letter country code of the colonizer of that country
def get_colonist(cc3):
    
    english_colonies = ['EGY', 'KEN', 'NGA', 'ZAF', 'ZMB', 'ZWE', 'MUS']
    french_colonies = ['DZA', 'CIV', 'CAF', 'MAR', 'TUN']
    portuguese_colonies = ['AGO']

    if cc3 in portuguese_colonies:
        return 'PRT'
    elif cc3 in english_colonies:
        return 'GBR'
    elif cc3 in french_colonies:
        return 'FRA'
    
# Color codes of former colonists of the african countries
def get_color(cc3):
    
    cmap = {'GBR': 'r', 'FRA': 'b', 'PRT': 'g'}
    return cmap[get_colonist(cc3)]

In [None]:
# Columns that signify crises
crises = crises_df[['systemic_crisis', 'domestic_debt_in_default', 
                    'currency_crises', 'inflation_crises', 
                    'banking_crisis']]

ccs = crises_df['cc3'].unique()

for cc in ccs:
    years = crises_df.loc[crises_df['cc3'] == cc]['year']
    inflation = crises_df.loc[crises_df['cc3'] == cc]['inflation_annual_cpi']
    
    plt.plot(years, inflation, label=cc)

plt.yscale('log')
plt.show()

In [None]:
# Gather all data gathered after 1957 as this is the minimum year we have for each country
crises_df_after_1957 = crises_df.loc[crises_df['year'] >= 1957]

# Gather all unique country codes we have
ccs = crises_df['cc3'].unique()

# Iterate and plot the inflation time-series
for cc in ccs:
    if cc != 'ZWE' and cc != 'AGO':
        years = crises_df_after_1957.loc[crises_df['cc3'] == cc]['year']
        inflation = crises_df_after_1957.loc[crises_df['cc3'] == cc]['inflation_annual_cpi']
    
        plt.plot(years, inflation, color=get_color(cc))

plt.title('Inflation after 1957 (ZWE/AGO removed)')
plt.xlabel('Years')
plt.ylabel('Inflation (log scaled)')
# plt.yscale('log')
# Create custom legend to only signify the colonists
custom_lines = [Line2D([0], [0], color='r'),
                Line2D([0], [0], color='b'),
                Line2D([0], [0], color='g')]
plt.legend(custom_lines, ['GBR', 'FRA', 'PRT'])
plt.show()

In [None]:
# Columns that signify what rows/years had crises
crises_cols = ['systemic_crisis', 'domestic_debt_in_default', 
                'currency_crises', 'inflation_crises', 
                'banking_crisis']

# Gather all boolean crises from after 1957
bool_crises_df = crises_df_after_1957[['systemic_crisis', 'domestic_debt_in_default', 
                                       'currency_crises', 'inflation_crises', 
                                       'banking_crisis', 'cc3']]

# Replace values in banking_crisis with boolean values
bool_crises_df = bool_crises_df.replace({'banking_crisis': {'crisis': 1, 'no_crisis': 0}})

# Iterate over columns that signify crises
for crisis in crises_cols:
    crisis_counts = []
    # For each country save the sum (amount of crisis years)
    for cc in ccs:
        crisis_counts.append(bool_crises_df[crisis].loc[bool_crises_df['cc3'] == cc].sum())
        
    # Sort country codes and counts based on counts
    cc_sorted, counts_sorted = zip(*[(x, y) for y, x in sorted(zip(crisis_counts, ccs))])
    
    # Give each country a color
    colors = [get_color(cc) for cc in cc_sorted]
    plt.title(crisis)
    plt.bar(cc_sorted, counts_sorted, color=colors)
    plt.show()

In [None]:
# Dictionary with keys being country codes and their values the date of independence
y_of_independence = {}

for cc in ccs:
    country_crises_df = crises_df.loc[crises_df.cc3 == cc]
    # Gather year of independence in our dictionary
    y_of_independence[cc] = country_crises_df[country_crises_df.independence == 1].iloc[0].year


# # Plots years of independence on a number line (not useful)

# countries, years = list(y_of_independence.keys()), list(y_of_independence.values())
# colors = [get_color(c) for c in countries]

# plt.title('Years of independence of our data-set')
# plt.scatter(years, np.zeros_like(years), c=colors)

# # Hide y scale as we do not need it
# plt.yticks(color='w')
# plt.show()

In [None]:
for cc in ccs:

    country_crises_df = crises_df.loc[crises_df.cc3 == cc]
    years = country_crises_df.year
    
    # Sum up amount of crises in the country
    total_crises_count_yearly = country_crises_df[crises_cols].sum(axis=1)

    plt.title(f'No. of crises in {cc} with year of independence labeled')
    plt.xlabel('Years')
    plt.ylabel('No. of crises')

    plt.plot(years, total_crises_count_yearly, color=get_color(cc))
    plt.axvline(y_of_independence[cc], color='g', label='year of independence')
    
    # Set y-ticks since we don't need intermediate values
    plt.yticks([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4])

    plt.legend(loc='upper right')
    plt.show()