In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as sts
# from factor_analyzer import FactorAnalyzer
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Lasso

In [2]:
data = pd.read_csv("MultiStat_project.csv")

In [3]:
# list of countries in the dataset
country_lst = data.country.drop_duplicates()
len(country_lst)

43

## Wilcoxon Test

In [4]:
from itertools import groupby
from operator import itemgetter

# get index of all the +2 and -2 neighbors of the stress years 
def get_index_before_after_stress_yr():
    for country in country_lst:

        index_country_stress = data.loc[(data.country == country) & (data['crisis_next_year'] == 1)].index.tolist()
        country_index = data.loc[data.country == country].index.tolist()

        ranges =[]

        for k,g in groupby(enumerate(index_country_stress),lambda x:x[0]-x[1]):
            group = (map(itemgetter(1),g))
            group = list(map(int,group))
            if len(group) > 1:
                ranges.append([group[0],group[-1]])
            else:
                ranges.append(group[0])

        # get +2 and -2 yrs index of the yrs of crisis

        for i in ranges:
            if type(i) == int:
                local_boundary_bef = [j if j in country_index else np.nan for j in list(range(i-1,i+1)) + list(range(i+2,i+4))]

            elif type(i) == list:
                local_boundary_bef = [j if j in country_index else np.nan for j in list(range(i[0]-1, i[0]+1)) + list(range(i[1]+2,i[1]+4))]
            
            yield {
                "before 2 yr": local_boundary_bef[0],
                "before 1 yr": local_boundary_bef[1],
                "after 1 yr": local_boundary_bef[2],
                "after 2 yr": local_boundary_bef[3],
            }
            
            
A = get_index_before_after_stress_yr()

In [5]:
# get all the indexes in one df
neigh_stress_yrs = pd.DataFrame(A)

# seperate the indexes into first and second lag yr
# this is done so thant when we dropna we can preserve as many datapoints as possible
stress_1 = neigh_stress_yrs[["before 1 yr", "after 1 yr"]].dropna()
stress_2 = neigh_stress_yrs[["before 2 yr", "after 2 yr"]].dropna()


Wilcoxon test for 1 yr before and after stress periods

In [6]:
# declaring all covariates for ease
varables = ['cpi', 'dyn_gdp', 'dyn_gdp_china',
       'dyn_GDP_US', 'interest_rate_US', 'oil_yoy', 'dyn_consum',
       'dyn_fx_rate', 'diff_priv_credit_gdp', 'net_lending', 'public_debt',
       'interest_on_debt', 'overvaluation', 'ca_balance', 'dyn_fix_cap_form',
       'dyn_export_share', 'diff_unempl', 'dyn_prod_dol', 'VIX', 'GDP_per_cap']

In [7]:
# fn to the test statistic for each variable 
def wilcoxon_test(variables, period, df):
    for var in variables:
        x = data[[var]].iloc[df[f"before {period} yr"],:]
        # resetting index for use in fn
        x.index = [i for i in range(0, len(x))]

        y = data[[var]].iloc[df[f"after {period} yr"],:]
        # resetting index for use in fn
        y.index = [i for i in range(0, len(y))]
        
        yield{
            f"Wilcoxon p values {period}": sts.wilcoxon(x[var], y = y[var])[1],
            f"Variable {period}": var
        }
        

In [8]:
# Generate Wilcoxon estimates
wil_1 = pd.DataFrame(wilcoxon_test(varables, 1, stress_1))
wil_2 = pd.DataFrame(wilcoxon_test(varables, 2, stress_2))




In [15]:
# concatinate the two dfs
wilcoxon = pd.concat([wil_1, wil_2], axis= 1 )
# rearranging columns
wilcoxon = wilcoxon[["Variable 1", "Wilcoxon p values 1", "Wilcoxon p values 2"]]
# wilcoxon.to_latex()
wilcoxon

Unnamed: 0,Variable 1,Wilcoxon p values 1,Wilcoxon p values 2
0,cpi,0.00369,0.015583
1,dyn_gdp,0.573546,0.424489
2,dyn_gdp_china,6.5e-05,0.002505
3,dyn_GDP_US,0.837178,0.50284
4,interest_rate_US,5.6e-05,7e-06
5,oil_yoy,0.295841,0.003671
6,dyn_consum,0.109783,0.482591
7,dyn_fx_rate,0.906399,0.520182
8,diff_priv_credit_gdp,0.220972,0.032894
9,net_lending,0.447624,0.26152


In [None]:
2003 1
2004 0

2005 1
2006 1
2007 1

2008 0
2009 0
