In [1]:
from collections import Counter
import os
from pprint import pprint

import pandas as pd
from scipy.stats import norm

from utils import BASE_DIR, negative_outcomes, democracies, costs, out2cat, code_dir

In [2]:
testno = 9
STATA_OUTPUT_DIR = os.path.join(BASE_DIR, 'Stata', f'output_{testno}')

In [23]:
m3obs = pd.read_csv(os.path.join(STATA_OUTPUT_DIR, 'M3StatsComparisons.csv'))
considered_models = set(m3obs['Model'])
m3obs_pivot = m3obs.pivot(index=['Outcome', 'OutcomeCategory', 'Stat'], columns='Model', values='Result')

m3obs_pivot = m3obs_pivot.reset_index()

m3obs_pivot['StatN'] = m3obs_pivot['Stat'].replace({
    'betaw': 1, 'betap': 2,
    'tstatw': 3, 'tstatp': 4,
    'AIC': 10, 'BIC': 11,
    'loglike': 21,
})
m3obs_pivot = m3obs_pivot.sort_values(['OutcomeCategory', 'Outcome', 'StatN'])
m3obs_pivot = m3obs_pivot.drop(columns=['StatN'])
for f in m3obs_pivot.columns[3:]:
    m3obs_pivot[f] = pd.to_numeric(m3obs_pivot[f])
m3obs = m3obs.set_index(['Outcome', 'Model', 'Stat'])

In [24]:
m3obs_pivot.to_csv(f'M3-stats-{testno}-pivot.csv', index=False)

In [34]:
democracies = ['W4']
costs = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']

In [35]:
"""
    'model', 'outcome',
    'absbetap', 'bhp', 'pstatp',
    'absbetaw', 'bhw', 'pstatw',
    'm3_win_aic', 'm3_win_bic'
"""

def t2p(t: float):
    return 2 * (1 - norm.cdf(abs(t)))

m3result_rows = []

def getrow(outcome, model, *factors):
    row = {}
    for f in factors:
        beta = m3obs.loc[(outcome, model, f'beta{f}')]['Result'] # type: ignore
        tstat = m3obs.loc[(outcome, model, f'tstat{f}')]['Result'] # type: ignore

        if beta == 0:
            bh = False
        if neg:
            bh = beta < 0
        else:
            bh = beta > 0
        
        bh = int(bh)

        pstat = t2p(tstat) if bh else 1

        row = {
            f'absbeta{f}':  int(bh) and abs(beta),
            f'bh{f}': bh,
            f'pstat{f}': pstat,
            **row,
        }

    return row

for outcome in set(m3obs_pivot['Outcome']):
    outcome: str
    neg = outcome in negative_outcomes

    for w in democracies:
        m3result_rows.append({
            'model': w,
            'outcome': outcome,
            **getrow(outcome, w, 'w')
        })

    for p in costs:
        m3result_rows.append({
            'model': p,
            'outcome': outcome,
            **getrow(outcome, p, 'p')
        })

    for w in democracies:
        for p in costs:
            m3 = f'{w}+{p}'
            m1 = f'{m3}/{w}'
            m2 = f'{m3}/{p}'

            row = getrow(outcome, m3, 'p')
            wmodel = m1 if p == 'e_peaveduc' and 'Endog' in out2cat[outcome] else m3
            row.update(getrow(outcome, wmodel, 'w'))

            m3bic = m3obs.loc[(outcome, m3, 'BIC')]['Result'] # type: ignore
            m3aic = m3obs.loc[(outcome, m3, 'AIC')]['Result'] # type: ignore
            m1bic = m3obs.loc[(outcome, m1, 'BIC')]['Result'] # type: ignore
            m1aic = m3obs.loc[(outcome, m1, 'AIC')]['Result'] # type: ignore

            m3result_rows.append({
                'model': m3,
                'outcome': outcome,
                **row,
                'm3_win_bic': row['bhp'] and int(m3bic < m1bic),
                'm3_win_aic': row['bhp'] and int(m3aic < m1aic),
            })


In [36]:
m3_results_db = pd.DataFrame(
    m3result_rows,
    columns=(
        'model', 'outcome', 
        'absbetap', 'bhp', 'pstatp', 
        'absbetaw', 'bhw', 'pstatw', 
        'm3_win_bic', 'm3_win_aic',
    ),
)
m3_results_db

Unnamed: 0,model,outcome,absbetap,bhp,pstatp,absbetaw,bhw,pstatw,m3_win_bic,m3_win_aic
0,W4,RuleofLaw,,,,0.53,1.0,0.000000,,
1,p1,RuleofLaw,0.02,1.0,0.696537,,,,,
2,p2,RuleofLaw,0.00,0.0,1.000000,,,,,
3,p3,RuleofLaw,0.04,1.0,0.342112,,,,,
4,p4,RuleofLaw,0.00,0.0,1.000000,,,,,
...,...,...,...,...,...,...,...,...,...,...
450,W4+p2,ReligiousFreedom,0.03,1.0,0.031555,2.98,1.0,0.000000,1.0,1.0
451,W4+p3,ReligiousFreedom,0.00,0.0,1.000000,1.64,1.0,0.000650,0.0,0.0
452,W4+p4,ReligiousFreedom,0.03,1.0,0.030007,2.97,1.0,0.000000,1.0,1.0
453,W4+p5,ReligiousFreedom,0.02,1.0,0.465390,1.64,1.0,0.000674,0.0,1.0


In [37]:
m3_results_db['pp100'] = m3_results_db['pstatp'] < 0.100
m3_results_db['pp050'] = m3_results_db['pstatp'] < 0.050
m3_results_db['pp010'] = m3_results_db['pstatp'] < 0.010
m3_results_db['pp001'] = m3_results_db['pstatp'] < 0.001
m3_results_db['pw100'] = m3_results_db['pstatw'] < 0.100
m3_results_db['pw050'] = m3_results_db['pstatw'] < 0.050
m3_results_db['pw010'] = m3_results_db['pstatw'] < 0.010
m3_results_db['pw001'] = m3_results_db['pstatw'] < 0.001

In [38]:
per_model_r = m3_results_db.groupby('model')[[
    'bhp', 
    'pp100', 'pp050', 'pp010', 'pp001',
    'bhw',
    'pw100', 'pw050', 'pw010', 'pw001',
    'm3_win_bic', 'm3_win_aic',
]].sum()
per_model_r['meanbetap'] = m3_results_db.groupby('model')['absbetap'].mean()
per_model_r['meanbetaw'] = m3_results_db.groupby('model')['absbetaw'].mean()
per_model_r = per_model_r.reset_index()

In [39]:
# Sort
wloc = { w: i + 1 for i, w in enumerate(democracies) }
ploc = { p: i + 1 for i, p in enumerate(costs) }

base = max(len(wloc), len(ploc)) + 1

def custom_sort(m: str):
    if '+' in m:
        w, p = m.split('+')
        return wloc[w] * base + ploc[p] + base ** 2

    if m in wloc:
        return wloc[m]

    if m in ploc:
        return ploc[m] * base
    
    raise Exception('Not sure what to do')

per_model_r['custom_sort'] = per_model_r['model'].apply(custom_sort)
per_model_r = per_model_r.sort_values('custom_sort')
per_model_r = per_model_r.drop(columns='custom_sort')

In [40]:
per_model_r.to_csv(os.path.join(code_dir, 'Data_Analysis.csv'), index=False)

In [41]:
per_model_r

Unnamed: 0,model,bhp,pp100,pp050,pp010,pp001,bhw,pw100,pw050,pw010,pw001,m3_win_bic,m3_win_aic,meanbetap,meanbetaw
0,W4,0.0,0,0,0,0,31.0,26,25,25,24,0.0,0.0,,4.732
7,p1,25.0,5,3,2,2,0.0,0,0,0,0,0.0,0.0,2.430857,
8,p2,24.0,16,15,5,4,0.0,0,0,0,0,0.0,0.0,0.180857,
9,p3,31.0,12,7,3,3,0.0,0,0,0,0,0.0,0.0,2.572857,
10,p4,25.0,18,17,7,4,0.0,0,0,0,0,0.0,0.0,0.182,
11,p5,12.0,4,2,2,1,0.0,0,0,0,0,0.0,0.0,0.285429,
12,p6,12.0,4,2,2,1,0.0,0,0,0,0,0.0,0.0,0.384,
1,W4+p1,24.0,6,4,2,2,31.0,27,27,23,21,10.0,18.0,2.413143,3.916286
2,W4+p2,15.0,6,6,3,3,31.0,26,25,25,24,13.0,14.0,0.167714,4.641143
3,W4+p3,27.0,7,5,2,2,31.0,27,27,23,21,15.0,23.0,2.414,3.806
