In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 500)

In [2]:
df_reasons = pd.DataFrame({
    "reason":['Children', 'Race', 'Religion', 'Indigenous', 'Refugees', 'Sexual orientation', 'Media governance',
              'Media roles', 'Natural resources', 'Nomadism', 'Culture (tangible)', 'Culture (intangible)', 'Drugs', 'Crime',
              'State definition', 'Life', 'Torture', 'Slavery', 'Freedom of speech', 'Freedom of movement', 'Vote'], 
    
    "id":['children', 'racial', 'religion','indigenous', 'refugees', 'sexual', 'media_gov', 'media_roles',
              'natural', 'nomadism', 'cult_tang', 'cult_intang', 'drugs', 'cri dme',
              'state', 'life', 'torture', 'slavery', 'speech', 'movement', 'vote'],  
    
    "explanation":[
        'Peace agreements that contain provision(s) on children, child rights, youth, young people, or similar',
        'Peace agreements that contain provision(s) related to race, national minorities, ethnic nationalism or similar',
        'Peace agreements that contain provision(s) on religious groups',
        'Peace agreements that contain provision(s) on indigenous groups',
        'Peace agreements that contain provision(s) that address refugee issues, such as return, repatriation and land claims',
        'Peace agreements that contain provision(s) related to gay, lesbian, transgender or transsexual people',
        'Peace agreements that contain provision(s) related to governance of media, such as where power over media and communication are assigned',
        
        'Peace agreements that contain provision(s) dealing with particular roles or access for the media',
        'Peace agreements that contain provision(s) related with natural resources, such as hydrocarbons, pasture, forestry, or hydroelectric power generation',
        'Peace agreements that contain provision(s) for nomadic or pastoralist peoples or communities, including grazing rights',
        
        'Peace agreements that contain provision(s) related to tangible aspects of cultural heritage, such as museums, antiques, monuments or handicrafts',
        'Peace agreements that contain provision(s) related to intangible aspects of cultural heritage, such as national languages, dances, songs or traditions',
        'Peace agreements that contain provision(s) related to illicit drugs, including drug trading and trafficking',
        'Peace agreements that contain provision(s) related to crime or organised crime',

        'Peace agreements that contain provision(s) that refer to the self determination, referendum or secession of the state',
        'Peace agreements that contain provision(s) related to the right to life',
        'Peace agreements that contain provision(s) related to freedom from torture',
        'Peace agreements that contain provision(s) related to the ban of slavery',
        'Peace agreements that contain provision(s) related to freedom of speech',
        'Peace agreements that contain provision(s) related to freedom of movement',
        'Peace agreements that contain provision(s) related to the right to vote and participate in public life']
   })

In [3]:
df = pd.read_csv('Data/pax_data.csv')

In [4]:
vars_to_keep = ['Con', 'Dat', 'Contp', 'Agtp', 'Status', 'Stage', 'AgtId',
                'PPName', 'Agt', 'Part', 'Loc1ISO', 'Loc2ISO',
                'GCh', 'GRa', 'GRe', 'GInd', 'GRef', 'GeSo', 'MedGov', 'MedSubs', 'NatRes', 'LaNom', 
                'LaCHTa', 'LaCHIt', 'SsrDrugs', 'SsrCrOcr', 'StInd', 'StRef',  'StSd', 
                'CprLife', 'CprTort', 'CprSlav', 'CprFspe', 'CprFmov' ,'CprVote']

df = df.reindex(columns=vars_to_keep)

df = df.rename(columns={
                'GCh': 'children',
                'SsrCrOcr': 'crime',
                'LaCHIt': 'cult_intang',
                'LaCHTa': 'cult_tang',
                'SsrDrugs': 'drugs',
                'GInd': 'indigenous',
                'CprLife': 'life',
                'MedGov': 'media_gov',
                'MedSubs': 'media_roles',
                'CprFmov': 'movement',
                'NatRes': 'natural',
                'LaNom': 'nomadism',
                'GRa': 'racial',
                'GRef': 'refugees',
                'GRe': 'religion',
                'GeSo': 'sexual',
                'CprSlav': 'slavery',
                'CprFspe': 'speech',
                'StDef': 'state',
                'CprTort': 'torture',
                'CprVote': 'vote',
                })

df['Dat']= pd.to_datetime(df['Dat'])

cols_to_binarize = ['children', 'crime', 'drugs', 'indigenous', 'racial', 'refugees', 'religion']
for col in cols_to_binarize:
    not_includded = df[col].isin([0, 1])
    df[col] = np.where(not_includded, 0, 1)
    
    
df['state'] = (df['StInd'] | df['StRef'] | df['StSd'])

df.drop(columns=['StInd', 'StRef',  'StSd'], inplace=True)


df['Loc3ISO'] = np.nan
df['Loc4ISO'] = np.nan
df['Loc5ISO'] = np.nan
df['Loc6ISO'] = np.nan
df['Loc7ISO'] = np.nan
df['Loc8ISO'] = np.nan
df['Loc9ISO'] = np.nan
df['Loc10ISO'] = np.nan
df['Loc11ISO'] = np.nan


df['Stage'] = np.where(df.Stage=='Cea', 'Ceasefire related', df['Stage'])
df['Stage'] = np.where(df.Stage=='Imp', 'Implementation/Renegotiation', df['Stage'])
df['Stage'] = np.where(df.Stage=='Oth', 'Other', df['Stage'])
df['Stage'] = np.where(df.Stage=='Pre', 'Pre-negotiation/Process', df['Stage'])
df['Stage'] = np.where(df.Stage=='Ren', 'Renewal', df['Stage'])
df['Stage'] = np.where(df.Stage=='SubComp', 'Substantive-Comprehensive', df['Stage'])
df['Stage'] = np.where(df.Stage=='SubPar', 'Substantive-Partial', df['Stage'])


# Cut too long agreement text
too_long = (df.AgtId==1796)
df['Agt'] = np.where(too_long, (df.Agt.str[:350]+'...'), df.Agt)

In [5]:
reasons = [
        'racial', 'religion', 'indigenous', 'refugees', 'sexual', 'media_gov',
        'media_roles', 'natural', 'nomadism', 'cult_tang', 'cult_intang', 'drugs',
        'crime', 'life', 'torture', 'slavery', 'speech', 'movement', 'vote', 'state']

mask = (df.children!=0)
for r in reasons:
    mask = (mask | (df[r]!=0))
    
df = df[mask]
df.shape

(866, 42)

In [8]:
def update_location(indexes, locs):
    for num in indexes:
        for i,l in enumerate(locs):
            df.loc[num, 'Loc'+str(i+1)+'ISO'] = l 
            

update_location([39, 40], ['AGO', 'BDI', 'CAF', 'COD', 'KEN', 'COG', 'RWA', 'SDN', 'TZA', 'UGA', 'ZMB'])
update_location([41], ['AGO', 'BDI', 'CAF', 'COD', 'COG', 'RWA', 'ZAF', 'SSD', 'TZA', 'UGA', 'ZMB'])
update_location([43], ['AGO', 'CAF', 'COD', 'KEN', 'COG', 'RWA', 'SDN', 'TZA', 'UGA', 'ZMB'])
update_location([69], ['AZE', 'ARM', 'RUS'])
update_location([263], ['CAF', 'TCD', 'SDN'])
update_location([283], ['CHN', 'KAZ', 'KGZ', 'RUS', 'TJK'])
update_location([450], ['HRV', 'SVN'])
update_location([568], ['ERI', 'ETH', 'SOM'])
update_location([577], ['FRA', 'DEU', 'RUS', 'GBR', 'USA'])
update_location([767, 768, 769], ['ISR', 'JOR', 'PSE'])

isr_pal = df.Con.isin(['Israel/Palestine', 'Israel/(Palestine)', 'Palestine'])
df['Loc1ISO'] = np.where(isr_pal, 'ISR', df['Loc1ISO'])
df['Loc2ISO'] = np.where(isr_pal, 'PSE', df['Loc2ISO'])

kos_ser = df.Con.isin(['Kosovo/Serbia/Yugoslavia (former)'])
df['Loc1ISO'] = np.where(kos_ser, 'RKS', df['Loc1ISO'])
df['Loc2ISO'] = np.where(kos_ser, 'SRB', df['Loc2ISO'])

update_location([853], ['TUR', 'SYR', 'IRQ', 'IRN'])
update_location([857], ['LBN', 'SYR'])
update_location([895], ['LBY'])

df.loc[976, 'Loc2ISO'] = np.nan

update_location([1370, 1371], ['SRB'])
update_location([1533, 1536], ['SSD', 'SDN'])

sud = df.Con=='South Sudan/Sudan/Southern Kordofan - Blue Nile - Abyei'
df['Loc1ISO'] = np.where(sud, 'SSD', df['Loc1ISO'])
df['Loc2ISO'] = np.where(sud, 'SDN', df['Loc2ISO'])

In [9]:
df_chart = df.copy()

def create_country_list(row):
    l = list()
    
    for i in range(11):
        var_name = 'Loc'+str(i+1)+'ISO'
        if (row[var_name] == row[var_name]):
            l.append(row[var_name])
        
    return l


df_chart['LocISO'] = df_chart.apply(create_country_list, axis='columns')

In [10]:
id_vars = ['Con', 'Dat', 'Contp', 'Agtp', 'Status', 'Stage', 'PPName', 'Agt', 'AgtId', 'Part', 'children',
           'racial', 'religion', 'indigenous', 'refugees', 'sexual', 'media_gov', 'media_roles',
           'natural', 'nomadism', 'cult_tang', 'cult_intang', 'drugs', 'crime', 'life', 'torture',
           'slavery', 'speech', 'movement', 'vote', 'state']

value_vars = ['Loc1ISO', 'Loc2ISO', 'Loc3ISO', 'Loc4ISO', 'Loc5ISO', 'Loc6ISO',
              'Loc7ISO', 'Loc8ISO', 'Loc9ISO', 'Loc10ISO', 'Loc11ISO']

df_full = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='numCountry', value_name='LocISO')
df_full = df_full[df_full.LocISO.notna()]



vars_to_keep = ['Dat', 'Agtp', 'Status', 'Stage', 'PPName', 'Agt', 'AgtId',
                'children', 'racial', 'religion', 'indigenous', 'refugees',
                'sexual', 'media_gov', 'media_roles', 'natural', 'nomadism',
                'cult_tang', 'cult_intang', 'drugs', 'crime', 'life', 'torture',
                'slavery', 'speech', 'movement', 'vote', 'state', 'LocISO']

df_full = df_full.filter(vars_to_keep)
df_chart = df_chart.filter(vars_to_keep)

In [11]:
id_vars = ['Dat', 'Agtp', 'Status', 'Stage', 'PPName', 'Agt', 'AgtId', 'LocISO']

value_vars = ['children', 'racial', 'religion', 'indigenous', 'refugees',
              'sexual', 'media_gov', 'media_roles', 'natural', 'nomadism',
              'cult_tang', 'cult_intang', 'drugs', 'crime', 'life', 'torture',
              'slavery', 'speech', 'movement', 'vote', 'state']

df_full = df_full.melt(id_vars=id_vars, value_vars=value_vars, var_name='reason', value_name='reasonValue')
df_chart = df_chart.melt(id_vars=id_vars, value_vars=value_vars, var_name='reason', value_name='reasonValue')

In [12]:
df_full = df_full[df_full.reasonValue==1]
df_full.drop(columns='reasonValue', inplace=True)

df_chart = df_chart[df_chart.reasonValue==1]
df_chart.drop(columns='reasonValue', inplace=True)

df_chart.shape

(2310, 9)

In [16]:
df_full.to_json('pax_data.json', orient='records')
df_chart.to_json('pax_data_chart.json', orient='records')
df_reasons.to_json('21reasons.json', orient='records')