In [1]:
import pandas as pd
import datetime

## Load data, prepare data

In [2]:
# Load dataframe
df = pd.read_csv('./sources/IMI-PearsonBaumann.csv')
# Exclude non-conscious interventions in Civil Wars only
mask = df['domestic_dispute'] == 1
df = df[mask]

# Convert dates to values that we can use
df['start'] = df['start'].astype(str).str[:4]
df['start'] = df['start'].astype(int)
df['end'] = df['end'].astype(str).str[:4]
df['end'] = df['end'].astype(int)

df['duration'] = df['end'] - df['start'] + 1
# Remove error-code enddates
mask = df['duration'] < 5000
df = df[mask]

# Convert dates to actual datetime objects
df.start = pd.to_datetime(df.start, format='%Y')
df.end = pd.to_datetime(df.end, format='%Y')

In [3]:
# Drop variables that are not going to be used
droplist = [
    'Data', 'amount', 'air', 'naval', 'size_naval',
    'firing_outside', 'intervener_casualties', 'target casualties',
]
df = df.drop(droplist, axis=1)

In [4]:
def set_col_dummy(x):
    """ Assign the correct value to the 'colonial ties dummy"""
    triggers = [1,2,3]
    if x['colonial'] in triggers:
        return 1
    else:
        return 0

# Add 'colonial_ties' dummy
df['colonial_dummy'] = 0
# Apply function to get correct value
df['colonial_dummy'] = df.apply(set_col_dummy, axis=1)

# Add 'previous intervention dummy'
dummies = pd.get_dummies(df['pevious_int'], prefix='prevint')
dummies = dummies['prevint_1']
dummies.columns = ['prev_int']
df = pd.concat([df, dummies], axis=1)

# Add 'alignment dummies'
dummies = pd.get_dummies(df['alignment'], prefix='align')
dummies.columns = [
    'non-aligned leaning West', 'non-aligned leaning East',  
    'non-aligned', 'West bloc', 'East bloc',
    'Alignment not ascertained'
]
df = pd.concat([df, dummies], axis=1)

# Add 'goal' dummies
dummies = pd.get_dummies(df['direction'], prefix='dir')
dummies.columns = [
    'Non-supportive or neutral intervention',
    'Support government',
    'Oppose rebels or opposition groups',
    'Oppose government',
    'Support rebel or opposition groups',
    'Support or oppose 3rd party government',
    'Support or oppose rebel groups in sanctuary',   
]
df = pd.concat([df, dummies], axis=1)


## Convert DF to country-year

In [5]:
columns = [   
    'intervener', 'target', 'colonial_dummy', 'prevint_1', 'non-aligned leaning West',
    'non-aligned leaning East', 'non-aligned', 'West bloc', 'East bloc',
    'Alignment not ascertained', 'Non-supportive or neutral intervention',
    'Support government', 'Oppose rebels or opposition groups',
    'Oppose government', 'Support rebel or opposition groups',
    'Support or oppose 3rd party government',
    'Support or oppose rebel groups in sanctuary' 
]

df = pd.concat(
    [pd.DataFrame(
        {
            'date': pd.date_range(row.start, row.end + pd.DateOffset(years=1), freq='12m'),
            'target': row.target, 'intervener': row.intervener, 
            'colonial_dummy': row.colonial_dummy, 'prevint_1': row.prevint_1,
            'non-aligned leaning West': row['non-aligned leaning West'],
            'non-aligned leaning East': row['non-aligned leaning East'],
            'non-aligned': row['non-aligned'], 'West bloc': row['West bloc'],
            'East bloc': row['East bloc'], 
            'Alignment not ascertained': row['Alignment not ascertained'],
            'Non-supportive or neutral intervention': row['Non-supportive or neutral intervention'],
            'Support government': row['Support government'],
            'Oppose rebels or opposition groups': row['Oppose rebels or opposition groups'],
            'Oppose government': row['Oppose government'], 
            'Support rebel or opposition groups': row['Support rebel or opposition groups'],
            'Support or oppose 3rd party government': row['Support or oppose 3rd party government'],
            'Support or oppose rebel groups in sanctuary': row['Support or oppose rebel groups in sanctuary']           
        }, 
        
        columns=[
        'date', 'intervener', 'target', 'colonial_dummy', 'prevint_1', 'non-aligned leaning West',
        'non-aligned leaning East', 'non-aligned', 'West bloc', 'East bloc',
        'Alignment not ascertained', 'Non-supportive or neutral intervention',
        'Support government', 'Oppose rebels or opposition groups',
        'Oppose government', 'Support rebel or opposition groups',
        'Support or oppose 3rd party government',
        'Support or oppose rebel groups in sanctuary'    
    ]
    ) for i, row in df.iterrows()])

# Reconvert date to year
df['date'] = df['date'].dt.year
# Remove all interventions we dont care about
nations = [2, 200, 210, 211, 220, 230, 235, 365]
mask = df['intervener'].isin(nations)
df = df[mask]
dummies = df['intervener'].astype('str').str.get_dummies()
df = pd.concat([df, dummies], axis=1)

## Group and export to CSV

In [6]:
# Group dataframe, so one year-target pair exists for each country
df = df.groupby(['target', 'date']).max()
df.reset_index(inplace=True)
df.columns = [
    'target', 'date', 'intervener', 'colonial_dummy', 'prevint_1',
    'non-aligned leaning West', 'non-aligned leaning East', 'non-aligned',
    'West bloc', 'East bloc', 'Alignment not ascertained',
    'Non-supportive or neutral intervention', 'Support government',
    'Oppose rebels or opposition groups', 'Oppose government',
    'Support rebel or opposition groups',
    'Support or oppose 3rd party government',
    'Support or oppose rebel groups in sanctuary',
    'US_imi', 'UK_imi', 'BE_imi', 'FR_imi', 'SU_imi']

df['COL_IMI'] = 0
col_list = ['UK_imi', 'BE_imi', 'FR_imi']
df['COL_IMI'] = (df[col_list].sum(axis=1) > 0) * 1 
df.drop('intervener', axis=1, inplace=True)

# Save to CSV
df.to_csv('./output/IMI_prepared.csv')

In [7]:
df.head()

Unnamed: 0,target,date,colonial_dummy,prevint_1,non-aligned leaning West,non-aligned leaning East,non-aligned,West bloc,East bloc,Alignment not ascertained,...,Oppose government,Support rebel or opposition groups,Support or oppose 3rd party government,Support or oppose rebel groups in sanctuary,US_imi,UK_imi,BE_imi,FR_imi,SU_imi,COL_IMI
0,42,1961,1,0,0,0,0,1,0,0,...,1,0,0,0,1,0,0,0,0,0
1,42,1965,1,1,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
2,42,1966,1,1,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
3,55,1983,0,0,0,0,1,0,0,0,...,1,0,0,0,1,0,0,0,0,0
4,90,1987,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172 entries, 0 to 171
Data columns (total 23 columns):
target                                         172 non-null int64
date                                           172 non-null int64
colonial_dummy                                 172 non-null int64
prevint_1                                      172 non-null int64
non-aligned leaning West                       172 non-null int64
non-aligned leaning East                       172 non-null int64
non-aligned                                    172 non-null int64
West bloc                                      172 non-null int64
East bloc                                      172 non-null int64
Alignment not ascertained                      172 non-null int64
Non-supportive or neutral intervention         172 non-null int64
Support government                             172 non-null int64
Oppose rebels or opposition groups             172 non-null int64
Oppose government                    