In [1]:
# This script takes the PRIO/COW data and UCDP data, and identifies how many deaths each country suffered in each year
# It also includes some extra data for the Iraq and Afghanistan wars

In [2]:
import pandas as pd
import numpy as np
import datetime
import dateutil.parser
import re
from tqdm import tqdm_notebook
pd.options.mode.chained_assignment = None

## Step 1: import datasets, prepare columns and functions ##

In [3]:
conflict_years_df = pd.read_csv("output_data/conflict_years_df.csv") # Dataframe containing years and death tolls for each conflict

In [4]:
# PRIO/COW dataframes containing the overall death tolls for each country in each conflict, between 1900 and 1997
COW_interstate_country_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_2.0_interstate_country_data.csv")
COW_intrastate_country_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_2.0_intrastate_country_data.csv")
COW_extrastate_country_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_2.0_extrastate_country_data.csv")

In [5]:
# UCDP dataframe containing the death tolls for each country in each conflict incident, between 1989 and 2017
UCDP_all_df = pd.read_csv("source_data/UCDP_conflict_data/UCDP_18.1_incident_data.csv",encoding="latin1")

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
# This dataframe lists all countries that have Maddison economic data and Polity regime data, with both Maddison and COW codes
country_converter_df = pd.read_csv("source_data/maddison_economic_data/country_converter.csv")
country_converter_df.set_index("gwn_id",drop=True,inplace=True)
country_converter_df

Unnamed: 0_level_0,participant_country,participant_region,participant_maddison_code
gwn_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
700.0,Afghanistan,Middle East,AFG
339.0,Albania,Europe,ALB
615.0,Algeria,Africa,DZA
540.0,Angola,Africa,AGO
160.0,Argentina,Americas,ARG
371.0,Armenia,Europe,ARM
900.0,Australia,Asia,AUS
305.0,Austria,Europe,AUT
300.0,Austria (with Hungary),Europe,AUT
373.0,Azerbaijan,Europe,AZE


In [7]:
# Columns used for initial dataframe
country_conflicts_columns = ["participant_country","participant_country_id","participant_maddison_code",
"participant_region","participant_deaths","conflict_name","conflict_start_year","conflict_end_year","COW_id","UCDP_id","state_conflict_type"]

In [8]:
# Columns used for final dataframe
country_conflict_year_columns = country_conflicts_columns + ["year","year_deaths_share","year_deaths"]

## Step 2: clean Correlates of War data ##

In [9]:
# Function that gets the last year of a COW conflict ("yrend1" sometimes indicates a break in the conflict, rather than the end)
def get_COW_end_year(COW_df):
    end_year_list = []
    for i,r in COW_df.iterrows():
        if r["yrend2"] > 0:
            end_year_list.append(r["yrend2"])
        else:
            end_year_list.append(r["yrend1"])
    COW_df["yrend_last"] = end_year_list

In [10]:
get_COW_end_year(COW_interstate_country_df)
get_COW_end_year(COW_intrastate_country_df)
get_COW_end_year(COW_extrastate_country_df)

In [11]:
# Use country_converter_df to join Maddison codes and region to COW dataframe
COW_interstate_country_df = COW_interstate_country_df[["statenum","pbdeadbest","warname","yrbeg1","yrend_last","warno"]]
COW_interstate_country_df.columns = ["participant_country_id","participant_deaths","conflict_name","conflict_start_year","conflict_end_year","COW_id"]
COW_interstate_country_df = COW_interstate_country_df.join(country_converter_df,on="participant_country_id")
COW_interstate_country_df["UCDP_id"] = None
COW_interstate_country_df["state_conflict_type"] = "Interstate"
COW_interstate_country_df = COW_interstate_country_df[country_conflicts_columns] # Keep only relevant columns
COW_interstate_country_df

Unnamed: 0,participant_country,participant_country_id,participant_maddison_code,participant_region,participant_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,state_conflict_type
0,China,710,CHN,Asia,2000,Boxer Rebellion,1900,1900,82,,Interstate
1,USSR,364,SUN,Europe,302,Boxer Rebellion,1900,1900,82,,Interstate
2,Japan,740,JPN,Asia,622,Boxer Rebellion,1900,1900,82,,Interstate
3,United Kingdom,200,GBR,Europe,34,Boxer Rebellion,1900,1900,82,,Interstate
4,United States of America,2,USA,Americas,21,Boxer Rebellion,1900,1900,82,,Interstate
5,France,220,FRA,Europe,24,Boxer Rebellion,1900,1900,82,,Interstate
6,USSR,364,SUN,Europe,242,Sino-Russian,1900,1900,83,,Interstate
7,China,710,CHN,Asia,3758,Sino-Russian,1900,1900,83,,Interstate
8,USSR,364,SUN,Europe,52623,Russo-Japanese,1904,1905,85,,Interstate
9,Japan,740,JPN,Asia,58576,Russo-Japanese,1904,1905,85,,Interstate


In [12]:
# Use country_converter_df to join Maddison codes and region to COW dataframe
COW_intrastate_country_df = COW_intrastate_country_df[["statenum","pbdeadbest","yrbeg1","yrend_last","warno"]]
COW_intrastate_country_df.columns = ["participant_country_id","participant_deaths","conflict_start_year","conflict_end_year","COW_id"]
COW_intrastate_country_df = COW_intrastate_country_df.join(country_converter_df,on="participant_country_id")
COW_intrastate_country_df["conflict_name"] = None
COW_intrastate_country_df["UCDP_id"] = None
COW_intrastate_country_df["state_conflict_type"] = "Internal"
COW_intrastate_country_df = COW_intrastate_country_df[country_conflicts_columns] # Keep only relevant columns
COW_intrastate_country_df

Unnamed: 0,participant_country,participant_country_id,participant_maddison_code,participant_region,participant_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,state_conflict_type
0,Colombia,100,COL,Americas,50000.0,,1899,1903,582,,Internal
1,Venezuela,101,VEN,Americas,2000.0,,1899,1899,583,,Internal
2,Venezuela,101,VEN,Americas,1050.0,,1901,1903,584,,Internal
3,Turkey,640,TUR,Europe,6322.0,,1903,1903,585,,Internal
4,Uruguay,165,URY,Americas,880.0,,1904,1904,587,,Internal
5,USSR,364,SUN,Europe,16500.0,,1905,1906,588,,Internal
6,Romania,360,ROU,Europe,6500.0,,1907,1907,589,,Internal
7,France,220,FRA,Europe,200.0,,1907,1908,590,,Internal
8,Morocco,600,MAR,Africa,1000.0,,1907,1908,590,,Internal
9,Iran,630,IRN,Middle East,1000.0,,1908,1909,591,,Internal


In [13]:
# Use country_converter_df to join Maddison codes and region to COW dataframe
COW_extrastate_country_df = COW_extrastate_country_df[["statenum","pbdeadbest","yrbeg1","yrend_last","warno"]]
COW_extrastate_country_df.columns = ["participant_country_id","participant_deaths","conflict_start_year","conflict_end_year","COW_id"]
COW_extrastate_country_df = COW_extrastate_country_df.join(country_converter_df,on="participant_country_id")
COW_extrastate_country_df["conflict_name"] = None
COW_extrastate_country_df["UCDP_id"] = None
COW_extrastate_country_df["state_conflict_type"] = "Extrasystemic"
COW_extrastate_country_df = COW_extrastate_country_df[country_conflicts_columns] # Keep only relevant columns
COW_extrastate_country_df

Unnamed: 0,participant_country,participant_country_id,participant_maddison_code,participant_region,participant_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,state_conflict_type
0,United States of America,2,USA,Americas,4234,,1899,1902,392,,Extrasystemic
1,Philippines,840,PHL,Asia,16000,,1899,1902,392,,Extrasystemic
2,United Kingdom,200,GBR,Europe,200,,1899,1905,393,,Extrasystemic
3,Somalia,520,SOM,Africa,4000,,1899,1905,393,,Extrasystemic
4,United Kingdom,200,GBR,Europe,5774,,1899,1902,395,,Extrasystemic
5,South Africa,560,ZAF,Africa,6650,,1899,1902,395,,Extrasystemic
6,Nigeria,475,NGA,Africa,365,,1903,1903,396,,Extrasystemic
7,Germany,255,DEU,Europe,887,,1904,1905,397,,Extrasystemic
8,Namibia,565,NAM,Africa,11000,,1904,1905,397,,Extrasystemic
9,Germany,255,DEU,Europe,130,,1905,1906,399,,Extrasystemic


In [14]:
# Concatenate COW dataframes together
COW_country_conflicts_df = pd.concat([COW_interstate_country_df,COW_intrastate_country_df,COW_extrastate_country_df])

In [15]:
# Create dictionary that uses conflict_years_df to work out the share of a conflict's deaths that occurred in each year
COW_conflict_dict = {}
for i in list(conflict_years_df["COW_id"].unique()):
    if i >= 0:
        COW_conflict_dict[i] = {}
        conflict_year_rows = conflict_years_df[conflict_years_df["COW_id"]==i] # All rows in conflict_years_df for this conflict
        COW_conflict_dict[i]["conflict_name"] = conflict_year_rows.iloc[0]["conflict_name"]
        COW_conflict_dict[i]["state_conflict_type"] = conflict_year_rows.iloc[0]["state_conflict_type"]
        COW_conflict_dict[i]["years"] = {}
        
        # Create a separate entry in each conflict's dictionary for each year in that conflict
        for y in list(conflict_year_rows["year"].unique()):
            COW_conflict_dict[i]["years"][y] = {"deaths":0,"deaths_share":0}
            COW_conflict_dict[i]["years"][y]["deaths"] = np.sum(conflict_year_rows[conflict_year_rows["year"]==y]["best_deaths"])
            COW_conflict_dict[i]["years"][y]["deaths_share"] = COW_conflict_dict[i]["years"][y]["deaths"] / np.sum(conflict_year_rows["best_deaths"])

In [16]:
# Create a list of lists, which for each conflict adds a row for each country in each year, with the relevant death toll 
COW_country_conflict_year_lists = []

# Loop through rows of each country for each conflict
for i,r in COW_country_conflicts_df.iterrows():
    row_list = list(r) # Each of these rows is a country for each conflict
    country_conflict_start_year = r["conflict_start_year"]
    country_conflict_end_year = r["conflict_end_year"]
    deaths_share_without_country_fighting = 0 # Used to discount deaths before a country enters a war or after it exits
    
    if r["COW_id"] in COW_conflict_dict:
        
        # Loop through each conflict in COW_conflict_dict, checking what share of deaths occurred when a country was not fighting
        for k,v in COW_conflict_dict[r["COW_id"]]["years"].items():
            if k < country_conflict_start_year:
                deaths_share_without_country_fighting += v["deaths_share"]
            elif k > country_conflict_end_year:
                deaths_share_without_country_fighting += v["deaths_share"]
                
        # Loop through each conflict in COW_conflict_dict, allocating each country's deaths by year       
        for k,v in COW_conflict_dict[r["COW_id"]]["years"].items():
            if k < country_conflict_start_year:
                pass
            elif k > country_conflict_end_year:
                pass
            else:
                try:
                    # Calculate what share of country's deaths happened in each year, and allocate those deaths by year
                    country_year_deaths = (v["deaths_share"]/(1-deaths_share_without_country_fighting))*r["participant_deaths"]
                except Exception:
                    country_year_deaths = 0
                    
                # Append the conflict name, the year, the country's annual share of deaths and its annual death toll
                year_row_list = [] + row_list
                year_row_list[5] = COW_conflict_dict[r["COW_id"]]["conflict_name"]
                year_row_list.append(k) # The relevant year
                try:
                    year_row_list.append((v["deaths_share"]/(1-deaths_share_without_country_fighting)))
                except Exception:
                    year_row_list.append(0)
                year_row_list.append(country_year_deaths)
                COW_country_conflict_year_lists.append(year_row_list)

In [17]:
# Turn the list of lists into a dataframe, where each row is a country's annual death toll in a conflict
COW_country_conflict_years_df = pd.DataFrame(COW_country_conflict_year_lists,columns=country_conflict_year_columns)

In [18]:
COW_country_conflict_years_df

Unnamed: 0,participant_country,participant_country_id,participant_maddison_code,participant_region,participant_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,state_conflict_type,year,year_deaths_share,year_deaths
0,China,710,CHN,Asia,2000.0,Boxer Rebellion,1900,1900,82,,Interstate,1900,1.000000,2000.000000
1,USSR,364,SUN,Europe,302.0,Boxer Rebellion,1900,1900,82,,Interstate,1900,1.000000,302.000000
2,Japan,740,JPN,Asia,622.0,Boxer Rebellion,1900,1900,82,,Interstate,1900,1.000000,622.000000
3,United Kingdom,200,GBR,Europe,34.0,Boxer Rebellion,1900,1900,82,,Interstate,1900,1.000000,34.000000
4,United States of America,2,USA,Americas,21.0,Boxer Rebellion,1900,1900,82,,Interstate,1900,1.000000,21.000000
5,France,220,FRA,Europe,24.0,Boxer Rebellion,1900,1900,82,,Interstate,1900,1.000000,24.000000
6,USSR,364,SUN,Europe,242.0,Sino-Russian,1900,1900,83,,Interstate,1900,1.000000,242.000000
7,China,710,CHN,Asia,3758.0,Sino-Russian,1900,1900,83,,Interstate,1900,1.000000,3758.000000
8,USSR,364,SUN,Europe,52623.0,Russo-Japanese,1904,1905,85,,Interstate,1904,0.249993,13155.395075
9,USSR,364,SUN,Europe,52623.0,Russo-Japanese,1904,1905,85,,Interstate,1905,0.750007,39467.604925


## Step 3: clean UCDP data ##

In [19]:
# Filter the UCDP dataframe to include only state-based conflict
UCDP_state_conflict_df = UCDP_all_df[UCDP_all_df["type_of_violence"]==1]

# Create a list of all the dyad pairing in the UCDP dataframe
UCDP_dyad_list = list(UCDP_state_conflict_df["dyad_new_id"].unique())

In [20]:
UCDP_state_conflict_df

Unnamed: 0,id,year,active_year,type_of_violence,conflict_new_id,conflict_name,dyad_new_id,dyad_name,side_a_new_id,gwnoa,...,date_prec,date_start,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,low,best,high
0,7578,1989,1,1,309,Sudan:Government,663,Government of Sudan - SPLM/A,112,625,...,5,01/01/1989,03/10/1989,2700,0,0,0,2700,2700,2700
3,13930,1989,1,1,332,Mozambique:Government,722,Government of Mozambique - Renamo,100,541,...,4,01/01/1989,31/01/1989,0,60,0,0,60,60,60
19,62100,1989,0,1,369,Papua New Guinea:Bougainville,797,Government of Papua New Guinea - BRA,157,910,...,5,01/01/1989,29/11/1989,6,0,0,0,6,6,6
20,71542,1989,1,1,413,Ethiopia:Oromiya,869,Government of Ethiopia - OLF,97,530,...,5,01/01/1989,31/12/1989,0,0,0,25,25,25,0
21,71629,1989,1,1,218,Government of India-Government of Pakistan,422,Government of India - Government of Pakistan,141,750,...,5,01/01/1989,31/12/1989,0,0,0,25,25,25,25
22,71686,1989,1,1,221,Myanmar (Burma):Karen,428,Government of Myanmar (Burma) - KNU,144,775,...,5,01/01/1989,31/12/1989,0,0,0,3,0,3,0
28,90908,1989,0,1,364,India:Kashmir,792,Government of India - Kashmir insurgents,141,750,...,5,01/01/1989,31/12/1989,8,0,0,0,0,8,8
29,94814,1989,1,1,351,India:Punjab/Khalistan,775,Government of India - Sikh insurgents,141,750,...,5,01/01/1989,31/12/1989,172,633,0,0,805,805,805
30,94817,1989,1,1,351,India:Punjab/Khalistan,775,Government of India - Sikh insurgents,141,750,...,1,01/01/1989,01/01/1989,2,3,0,0,5,5,5
31,110978,1989,1,1,300,Cambodia (Kampuchea):Government,647,Government of Cambodia (Kampuchea) - FUNCINPEC,148,811,...,5,01/01/1989,31/05/1989,0,54,0,0,54,54,54


In [21]:
# Create a dataframe of each country's deaths in each dyad pairing in each year
# Create a list of lists, where each list is a row
dyad_years_list = []

for d in tqdm_notebook(UCDP_dyad_list):
    dyad_incidents_df = UCDP_state_conflict_df[UCDP_state_conflict_df["dyad_new_id"]==d] # All the incidents in each dyad
    side_a_name = dyad_incidents_df.iloc[0]["side_a"]
    side_b_name = dyad_incidents_df.iloc[0]["side_b"]
    side_a_gwn_id = dyad_incidents_df.iloc[0]["gwnoa"]
    side_b_gwn_id = dyad_incidents_df.iloc[0]["gwnob"]
    
    # Because some of the gwn_ids are strings, each one needs to be converted to a float
    try:
        side_a_gwn_id = float(side_a_gwn_id)
    except Exception:
        pass

    try:
        side_b_gwn_id = float(side_b_gwn_id)
    except Exception:
        pass
    
    # For each participant, the country Maddison codes and regions need to assigned from country_converter_df
    side_a_participant_country = None
    side_b_participant_country = None
    side_a_participant_country_id = None
    side_b_participant_country_id = None
    side_a_participant_maddison_code = None
    side_b_participant_maddison_code = None
    side_a_participant_region = None
    side_b_participant_region = None
    
    # If gwn_id is a number less than 1000, it refers to a state; participants that are not states will be dealt with later
    try:    
        if side_a_gwn_id < 1000:
            side_a_participant_country_id = side_a_gwn_id
            side_a_participant_country = country_converter_df.loc[side_a_gwn_id]["participant_country"]
            side_a_participant_region = country_converter_df.loc[side_a_gwn_id]["participant_region"]
            side_a_participant_maddison_code = country_converter_df.loc[side_a_gwn_id]["participant_maddison_code"]
        if side_b_gwn_id < 1000:
            side_b_participant_country_id = side_b_gwn_id
            side_b_participant_country = country_converter_df.loc[side_b_gwn_id]["participant_country"]
            side_b_participant_region = country_converter_df.loc[side_b_gwn_id]["participant_region"]
            side_b_participant_maddison_code = country_converter_df.loc[side_b_gwn_id]["participant_maddison_code"]
            
    except Exception: # for dyad 883, which attributes 2003 Iraq invasion deaths to USA, UK and Australia with a string gwn_id
            side_a_participant_country_id = 2
            side_a_participant_country = "United States of America"
            side_a_participant_region = "Americas"
            side_a_participant_maddison_code = "USA"
    
    # To identify the nationalities of non-state armies, calculate where most deaths in a dyad pair occurred
    # This dictionary includes the death toll in all countries in which the dyad pair fought
    dyad_location_deaths = {}
    for i,r in dyad_incidents_df.iterrows():
        if r["country_id"] in dyad_location_deaths:
            dyad_location_deaths[r["country_id"]] += r["best"] # Add the best estimate of deaths to the toll for that country
        else:
            dyad_location_deaths[r["country_id"]] = r["best"]
            
    # For participants that do not have a country_id, use the country in which the death toll is highest
    if side_b_participant_country_id is None:
        max_dyad_location = None
        max_dyad_location_deaths = 0
        
        # Find the country with the highest death toll
        for k,v in dyad_location_deaths.items():
            if v > max_dyad_location_deaths:
                max_dyad_location_deaths = v
                max_dyad_location = k
        side_b_participant_country_id = max_dyad_location
        
        # Use country_converter_df to assign the country Maddison codes and regions
        side_b_participant_country = country_converter_df.loc[max_dyad_location]["participant_country"]
        side_b_participant_region = country_converter_df.loc[max_dyad_location]["participant_region"]
        side_b_participant_maddison_code = country_converter_df.loc[max_dyad_location]["participant_maddison_code"]
        
    # Assign a conflict name, start and end years, and a UCDP conflict_id
    conflict_name = side_a_name + " vs. " + side_b_name
    conflict_start_year = np.min(dyad_incidents_df["year"])
    conflict_end_year = np.max(dyad_incidents_df["year"])
    COW_id = None
    UCDP_id = dyad_incidents_df.iloc[0]["conflict_new_id"]
    state_conflict_type = None # This will be assigned later, using 
    
    # Calculate the annual total of deaths for each country in a dyad pair
    for year in range(1989,2018):
        
        year_dyad_incidents_df = dyad_incidents_df[dyad_incidents_df["year"]==year]
        
        if year_dyad_incidents_df.shape[0] > 0: # Filter out years in which no dyadic incident occurred
    
            year_side_a_deaths = 0
            year_side_b_deaths = 0

            for i,r in year_dyad_incidents_df.iterrows():
                year_side_a_deaths = year_side_a_deaths + int(r["deaths_a"])
                year_side_b_deaths = year_side_b_deaths + int(r["deaths_b"])
                
                # Assign deaths of civilians and unknown people to the country in which the incident occurred
                if int(r["country_id"]) == side_a_participant_country_id:
                    year_side_a_deaths = year_side_a_deaths + int(r["deaths_civilians"]) + int(r["deaths_unknown"])
                else:
                    year_side_b_deaths = year_side_b_deaths + int(r["deaths_civilians"]) + int(r["deaths_unknown"])


            year_side_a_list = [side_a_participant_country,side_a_participant_country_id,side_a_participant_maddison_code,side_a_participant_region,
                          year_side_a_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,None,year]

            year_side_b_list = [side_b_participant_country,side_b_participant_country_id,side_b_participant_maddison_code,side_b_participant_region,
                          year_side_b_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,None,year]

            # Append both dyadic lists to the list of lists
            dyad_years_list.append(year_side_a_list)
            dyad_years_list.append(year_side_b_list)
    
UCDP_dyad_years_df = pd.DataFrame(dyad_years_list,columns=country_conflict_year_columns[0:-2])




In [22]:
# Create a dataframe of each country's deaths in each conflict in each year, using the dyad pairs
UCDP_country_conflict_year_lists = []

# Loop through all UCDP conflict_ids
for i in list(UCDP_dyad_years_df["UCDP_id"].unique()):
    UCDP_conflict_rows = UCDP_dyad_years_df[UCDP_dyad_years_df["UCDP_id"]==i] # Select all dyadic rows with this conflict_id
    UCDP_conflict_countries = list(UCDP_conflict_rows["participant_maddison_code"].unique()) # Select all countries in conflict
    
    # Loop through all countries in the conflict
    for c in UCDP_conflict_countries:
        UCDP_country_conflict_rows = UCDP_conflict_rows[UCDP_conflict_rows["participant_maddison_code"]==c] # Select country's rows
        UCDP_country_conflict_total_deaths = np.sum(UCDP_country_conflict_rows["participant_deaths"]) # Calculate country's total deaths in conflict
        if UCDP_country_conflict_total_deaths > 0:
            
            # For each year between 1989 and 2017, append relevant years for each country in each conflict to the list of lists
            for year in range(1989,2018):
                try:
                    
                    # Calculate the share of a country's total deaths that happened in this year
                    UCDP_country_conflict_year_rows = UCDP_country_conflict_rows[UCDP_country_conflict_rows["year"]==year]
                    UCDP_country_conflict_year_deaths = np.sum(UCDP_country_conflict_year_rows["participant_deaths"])
                    UCDP_country_conflict_year_deaths_share = UCDP_country_conflict_year_deaths / UCDP_country_conflict_total_deaths
                    UCDP_country_conflict_year_row_list = list(UCDP_country_conflict_year_rows.iloc[0])
                    UCDP_country_conflict_year_row_list[4] = UCDP_country_conflict_total_deaths # Fill the "participant deaths" field
                    UCDP_country_conflict_year_row_list.append(UCDP_country_conflict_year_deaths_share)
                    UCDP_country_conflict_year_row_list.append(UCDP_country_conflict_year_deaths)
                    UCDP_country_conflict_year_lists.append(UCDP_country_conflict_year_row_list)
                except Exception:
                    pass
UCDP_country_conflict_years_df = pd.DataFrame(UCDP_country_conflict_year_lists,columns=country_conflict_year_columns)

In [23]:
# Create a dictionary that contains the conflict_name and conflict_type for every UCDP conflict_id
UCDP_conflict_dict = {}
for i in list(conflict_years_df["UCDP_id"].unique()):
    if i >= 0:
        UCDP_conflict_dict[i] = {}
        conflict_year_rows = conflict_years_df[conflict_years_df["UCDP_id"]==i] # Select relevant rows in conflict_years_df
        UCDP_conflict_dict[i]["conflict_name"] = conflict_year_rows.iloc[0]["conflict_name"]
        UCDP_conflict_dict[i]["state_conflict_type"] = conflict_year_rows.iloc[0]["state_conflict_type"]

In [24]:
# Add columns to UCDP_country_conflict_years_df that contain the conflict_name and conflict_type
UCDP_country_conflict_years_conflict_name_list = []
UCDP_country_conflict_years_state_conflict_type_list = []

# Loop through rows in UCDP_country_conflict_years_df and look up conflict_name and conflict_type from UCDP_conflict_dict
for i,r in UCDP_country_conflict_years_df.iterrows():
    row_list = list(r)
    row_conflict_name = UCDP_conflict_dict[row_list[9]]["conflict_name"]
    row_state_conflict_type = UCDP_conflict_dict[row_list[9]]["state_conflict_type"]
    UCDP_country_conflict_years_conflict_name_list.append(row_conflict_name)
    UCDP_country_conflict_years_state_conflict_type_list.append(row_state_conflict_type)
UCDP_country_conflict_years_df["conflict_name"] = UCDP_country_conflict_years_conflict_name_list
UCDP_country_conflict_years_df["state_conflict_type"] = UCDP_country_conflict_years_state_conflict_type_list

In [25]:
UCDP_country_conflict_years_df

Unnamed: 0,participant_country,participant_country_id,participant_maddison_code,participant_region,participant_deaths,conflict_name,conflict_start_year,conflict_end_year,COW_id,UCDP_id,state_conflict_type,year,year_deaths_share,year_deaths
0,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1989,0.086672,4253
1,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1990,0.088343,4335
2,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1991,0.026839,1317
3,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1992,0.022071,1083
4,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1993,0.019197,942
5,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1994,0.006868,337
6,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1995,0.020379,1000
7,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1996,0.046648,2289
8,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1997,0.075586,3709
9,Sudan,625.0,SDN,Africa,49070,Government of Sudan vs. SPLM/A,1989,2004,,309,Internal,1998,0.099674,4891


## Step 4: combine both dataframes, and patch in Iraq, Afghanistan and Syria ##

In [26]:
# Concatenate COW and UCDP dataframes, excluding COW data from 1989 onwards
country_conflict_years_df = pd.concat([COW_country_conflict_years_df[COW_country_conflict_years_df["year"]<=1988],UCDP_country_conflict_years_df])

In [27]:
# Patch in iran_and_afghanistan_df, which contains data for those wars from iCasualties.org 
iraq_and_afghanistan_df = pd.read_csv("source_data/icasualties_conflict_data/icasualties_conflict_deaths_in_iraq_and_afghanistan.csv")

# Filter out all American rows from 2000 onwards, which are actually for the coalition
country_conflict_years_df = country_conflict_years_df[~((country_conflict_years_df["participant_maddison_code"]=="USA")&(country_conflict_years_df["year"]>2000))]

# Concatenate the dataframes together
country_conflict_years_df = pd.concat([country_conflict_years_df,iraq_and_afghanistan_df])

In [28]:
# Patch in syria_df, which includes deaths that are not contained in the UCDP incident data
syria_df = pd.read_csv("source_data/UCDP_conflict_data/UCDP_18.1_conflict_deaths_in_syria.csv")
country_conflict_years_df = pd.concat([country_conflict_years_df,syria_df])

In [29]:
country_conflict_years_df.sort_values(["year","conflict_name"],inplace=True)
country_conflict_years_df.reset_index(drop=True,inplace=True)
country_conflict_years_df.to_csv("output_data/country_conflict_years_df.csv")