In [1]:
import pandas as pd

## Warning - This block contains a somewhat expensive 'Groupby' operation that will take around 10 minutes

In [2]:
exsup = pd.read_csv('./sources/exsup.csv')
# Remove all non-coldwar entries
mask = exsup['ywp_year'] < 1992
exsup = exsup[mask]
#remove non US / SU interventions
dummies = exsup['external_name'].str.get_dummies(sep=';')
# 'Netherlands', 'Yugoslavia', 'Spain' and 'Portugal' are excluded because they are not in the Dataset as interveners.
dummies = dummies[[
    'United States', 'Soviet Union', 'United Kingdom', 'France', 
    'Belgium', 'Cuba', 'China']]
exsup = pd.concat([exsup, dummies], axis=1)
# Drop duplicates
exsup.drop_duplicates(inplace=True)

exsup.rename(columns={
    'external_type_X': 'troops',
    'external_type_Y': 'joint_ops',
    'external_type_L': 'terr',
    'external_type_W': 'weapons',
    'external_type_M': 'logistics',
    'external_type_T': 'training',
    'external_type_$': 'eco',
    'external_type_I': 'intel',
    'external_type_O': 'other',
    'external_type_U': 'unknown'
    
}, inplace=True)

# Add support specific dummies
old = ['external_type_X', 'external_type_Y', 'external_type_L',
       'external_type_W', 'external_type_M', 'external_type_T',
       'external_type_$', 'external_type_I', 'external_type_O', 
       'external_type_U']
new = [
    'troops', 'joint_ops', 'terr', 'weapons', 'logistics',
    'training', 'eco', 'intel', 'other', 'unknown'
]

us_support_cols = ['US_' + x for x in new]
su_support_cols = ['SU_' + x for x in new]
fr_support_cols = ['FR_' + x for x in new]
uk_support_cols = ['UK_' + x for x in new]
cuba_support_cols = ['CUBA_' + x for x in new]
ch_support_cols = ['CH_' + x for x in new]

for col in us_support_cols:
    exsup[col] = 0
for col in su_support_cols:
    exsup[col] = 0
for col in uk_support_cols:
    exsup[col] = 0
for col in fr_support_cols:
    exsup[col] = 0
for col in cuba_support_cols:
    exsup[col] = 0
for col in ch_support_cols:
    exsup[col] = 0   

   
for index, row in exsup.iterrows():
    if row['Soviet Union'] == 1:
        nation = 'SU_'
        for col in new:
            exsup[index: index + 1][nation + col] = int(row[col])
    if row['United States'] == 1:
        nation = 'US_'
        for col in new:
            exsup[index: index + 1][nation + col] = int(row[col])             
    if row['United Kingdom'] == 1:
        nation = 'UK_'
        for col in new:
            exsup[index: index + 1][nation + col] = int(row[col])
    if row['France'] == 1:
        nation = 'FR_'
        for col in new:
            exsup[index: index + 1][nation + col] = int(row[col])
    if row['Cuba'] == 1:
        nation = 'CUBA_'
        for col in new:
            exsup[index: index + 1][nation + col] = int(row[col])
    if row['China'] == 1:
        nation = 'CH_'
        for col in new:
            exsup[index: index + 1][nation + col] = int(row[col])

# Add non-military support dummies
countries = ['US', 'SU', 'UK', 'FR', 'CH', 'CUBA']
actions = ['joint_ops', 'terr', 'weapons', 'logistics',
    'training', 'eco', 'intel', 'other', 'unknown']
for country in countries:
    exsup[country+ '_none_mil'] = 0
    temp_list = [country + '_' + action for action in actions]
    exsup[country+ '_none_mil'] = (exsup[temp_list].sum(axis=1) > 0) * 1 
            
# Add (empty, since there are no values) dummies for Netherlands, Spain and Portugal
exsup['Netherlands'] = 0
exsup['Spain'] = 0
exsup['Portugal'] = 0

# delete unused columns
dellist = [
    'actorID', 'bc_id', 'bc_name', 'bwd_id', 'conflictID',
    'country1', 'eco', 'external_alleged', 'external_id',
    'external_nameid', 'intel', 'joint_ops', 'locationid2', 
    'logistics', 'other', 'terr', 'training', 'troops',
    'unknown', 'weapons', 'ywp_id', 'ywp_name'
]
exsup.drop(dellist, axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

In [3]:
# Add 'Previous intervention' and 'Previous non-mil interventions' dummies, per country
countries = ['US', 'SU', 'UK', 'FR', 'CUBA', 'CH']

for country in countries:
    exsup[country + '_previous' + '_none_mil'] = exsup[country + '_none_mil'].shift()
    exsup[country + '_previous' + '_troops'] = exsup[country + '_troops'].shift()
    exsup[country + '_previous' + '_none_mil'].fillna(0, inplace=True)
    exsup[country + '_previous' + '_troops'].fillna(0, inplace=True) 

In [4]:
# Create non-military support columns
non_mil = exsup.groupby('locationid1')[
            [country + '_none_mil' for country in countries]].max()
non_mil.columns = [str(col) + '_ever' for col in non_mil.columns]

# Create military support columns
mil = exsup.groupby('locationid1')[
            [country + '_troops' for country in countries]].max()
mil.columns = [str(col) + '_ever' for col in mil.columns]

# Concat them and reset index, to prepare for merge
support = pd.concat([non_mil, mil], axis=1)
support.reset_index(inplace=True)
exsup = exsup.merge(
    support, left_on='locationid1', right_on='locationid1', how='left'
)

# Merge dataframe per year-country combination
exsup = exsup.groupby(['locationid1', 'ywp_year']).max()
exsup.to_csv('./output/EXSUP_PREPARED.csv')

In [5]:
# for country in countries:
#     exsup[country + '_none_mil_ever'] = exsup.groupby('locationid1')[
#         country + '_none_mil'].max()
    
# for country in countries:
#     exsup[country + '_troops_ever'] = exsup.groupby('locationid1')[
#         country + '_troops'].max()


In [6]:
exsup.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 496 entries, (41, 1989) to (910, 1990)
Columns: 101 entries, Belgium to external_exists
dtypes: float64(12), int64(89)
memory usage: 393.2 KB
