In [1]:
# This script takes data from PRIO (some of which is COW compatible) and UCDP, and creates a dataframe of all conflicts
# It identifies the years in which the conflicts were fought, how many people died, and assigns a name to each conflict

In [2]:
import pandas as pd
import numpy as np
import datetime
import dateutil.parser
import re
from tqdm import tqdm_notebook
pd.options.mode.chained_assignment = None

## Step 1: import datasets, prepare columns and functions ##

In [3]:
# PRIO/COW dataframes that contain information about conflicts between 1900 and 1997
COW_interstate_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_2.0_interstate_data.csv")
COW_intrastate_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_2.0_intrastate_data.csv")
COW_extrastate_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_2.0_extrastate_data.csv")

In [4]:
# PRIO dataframe that contains information about conflicts between 1946 and 2008
PRIO_df = pd.read_csv("source_data/PRIO_conflict_data/PRIO_3.1_battle_data.csv",encoding="latin1")

In [5]:
# UCP dataframes that contain information about conflict and every conflict incident between 1989 and 2017
UCDP_all_df = pd.read_csv("source_data/UCDP_conflict_data/UCDP_18.1_incident_data.csv",encoding="latin1")
UCDP_state_conflict_df = pd.read_csv("source_data/UCDP_conflict_data/UCDP_18.1_state_conflict_data.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
# Columns used for final dataframe
conflict_years_columns = ["year","start_year","COW_id","UCDP_id","conflict_type","state_conflict_type","conflict_name",
                         "side_a","side_b","best_deaths","low_deaths","high_deaths","country","region"]

In [7]:
# Function that replaces -999 codes in COW and PRIO dataframes
def replace_unknowns(df):
    
    best_deaths = []
    
    for i,r in df.iterrows():
        if r["best_deaths"] == -999:
            if r["low_deaths"] >= 0:
                best_deaths.append(np.mean([r["low_deaths"],r["high_deaths"]]))
            else:
                best_deaths.append(0)
        else:
            best_deaths.append(r["best_deaths"])
            
    return best_deaths

## Step 2: clean Correlates of War data ##

In [8]:
COW_interstate_df = COW_interstate_df[["Year","YrBeg1","WarNo","WarName","bdeadbest","bdeadlow","bdeadhigh","WestHem","Europe","Africa","MidEast","Asia","Oceania"]]
COW_interstate_df.columns = ["year","start_year","COW_id","conflict_name","best_deaths","low_deaths","high_deaths","WestHem","Europe","Africa","MidEast","Asia","Oceania"]

# Assign standard regions to COW region codes
COW_interstate_region = []
for i,r in COW_interstate_df.iterrows():
    if r["WestHem"] == 1:
        COW_interstate_region.append("Americas")
    elif r["Europe"] == 1:
        COW_interstate_region.append("Europe")
    elif r["Africa"] == 1:
        COW_interstate_region.append("Africa")
    elif r["MidEast"] == 1:
        COW_interstate_region.append("Middle East")
    elif r["Asia"] == 1:
        COW_interstate_region.append("Asia")
    elif r["Oceania"] == 1:
        COW_interstate_region.append("Asia")
    else:
        COW_interstate_region.append(np.nan)
COW_interstate_df["region"] = COW_interstate_region

COW_interstate_df.drop(["WestHem","Europe","Africa","MidEast","Asia","Oceania"],axis=1,inplace=True)

# Assign values for various dataframe columns
COW_interstate_df["UCDP_id"] = None # Not contained in COW dataframe
COW_interstate_df["conflict_type"] = "State-based conflict"
COW_interstate_df["state_conflict_type"] = "Interstate" 
COW_interstate_df["side_a"] = None # Not contained in COW dataframe
COW_interstate_df["side_b"] = None # Not contained in COW dataframe
COW_interstate_df["country"] = None # Not contained in COW dataframe
COW_interstate_df = COW_interstate_df[conflict_years_columns]

COW_interstate_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
0,1900,1900,82,,State-based conflict,Interstate,Boxer Rebellion,,,3003,2503,3003,,Asia
1,1900,1900,83,,State-based conflict,Interstate,Sino-Russian,,,4000,4000,4000,,Asia
2,1904,1904,85,,State-based conflict,Interstate,Russo-Japanese,,,27799,27799,27799,,Asia
3,1905,1904,85,,State-based conflict,Interstate,Russo-Japanese,,,83400,83400,83400,,Asia
4,1906,1906,88,,State-based conflict,Interstate,Third Central American,,,1500,1000,2000,,Americas
5,1907,1907,91,,State-based conflict,Interstate,Fourth Central American,,,1000,1000,1000,,Americas
6,1909,1909,94,,State-based conflict,Interstate,Spanish-Moroccan,,,2500,2500,2500,,Middle East
7,1910,1909,94,,State-based conflict,Interstate,Spanish-Moroccan,,,2500,2500,2500,,Middle East
8,1911,1911,97,,State-based conflict,Interstate,Italo-Turkish,,,4750,3000,6500,,Middle East
9,1912,1911,97,,State-based conflict,Interstate,Italo-Turkish,,,4750,3000,6500,,Middle East


In [9]:
COW_intrastate_df = COW_intrastate_df[["Year","YrBeg1","WarNo","WarName","bdeadbest","bdeadlow","bdeadhigh","WestHem","Europe","Africa","MidEast","Asia","Oceania"]]
COW_intrastate_df.columns = ["year","start_year","COW_id","conflict_name","best_deaths","low_deaths","high_deaths","WestHem","Europe","Africa","MidEast","Asia","Oceania"]

# Assign standard regions to COW region codes
COW_intrastate_region = []
for i,r in COW_intrastate_df.iterrows():
    if r["WestHem"] == 1:
        COW_intrastate_region.append("Americas")
    elif r["Europe"] == 1:
        COW_intrastate_region.append("Europe")
    elif r["Africa"] == 1:
        COW_intrastate_region.append("Africa")
    elif r["MidEast"] == 1:
        COW_intrastate_region.append("Middle East")
    elif r["Asia"] == 1:
        COW_intrastate_region.append("Asia")
    elif r["Oceania"] == 1:
        COW_intrastate_region.append("Asia")
    else:
        COW_intrastate_region.append(np.nan)
COW_intrastate_df["region"] = COW_intrastate_region

COW_intrastate_df.drop(["WestHem","Europe","Africa","MidEast","Asia","Oceania"],axis=1,inplace=True)

# Assign values for various dataframe columns
COW_intrastate_df["UCDP_id"] = None # Not contained in COW dataframe
COW_intrastate_df["conflict_type"] = "State-based conflict"
COW_intrastate_df["state_conflict_type"] = "Internal"
COW_intrastate_df["side_a"] = None # Not contained in COW dataframe
COW_intrastate_df["side_b"] = None # Not contained in COW dataframe
COW_intrastate_df["country"] = None # Not contained in COW dataframe
COW_intrastate_df = COW_intrastate_df[conflict_years_columns]

COW_intrastate_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
0,1899,1899,582,,State-based conflict,Internal,Colombia vs. Liberals of 1899,,,13492.0,13492,13492,,Americas
1,1900,1899,582,,State-based conflict,Internal,Colombia vs. Liberals of 1899,,,9127.0,9127,9127,,Americas
2,1901,1899,582,,State-based conflict,Internal,Colombia vs. Liberals of 1899,,,9127.0,9127,9127,,Americas
3,1902,1899,582,,State-based conflict,Internal,Colombia vs. Liberals of 1899,,,9127.0,9127,9127,,Americas
4,1903,1899,582,,State-based conflict,Internal,Colombia vs. Liberals of 1899,,,9127.0,9127,9127,,Americas
5,1899,1899,583,,State-based conflict,Internal,Venezuela vs. Castro Led Rebels,,,2000.0,2000,2000,,Americas
6,1901,1901,584,,State-based conflict,Internal,Venezuela vs. Matos Led Rebels,,,350.0,350,350,,Americas
7,1902,1901,584,,State-based conflict,Internal,Venezuela vs. Matos Led Rebels,,,350.0,350,350,,Americas
8,1903,1901,584,,State-based conflict,Internal,Venezuela vs. Matos Led Rebels,,,350.0,350,350,,Americas
9,1903,1903,585,,State-based conflict,Internal,Ottoman Empire vs. VMRO Rebels,,,6322.0,6322,11016,,Europe


In [10]:
COW_extrastate_df = COW_extrastate_df[["Year","YrBeg1","WarNo","WarName","bdeadbest","bdeadlow","bdeadhigh","WestHem","Europe","Africa","MidEast","Asia","Oceania"]]
COW_extrastate_df.columns = ["year","start_year","COW_id","conflict_name","best_deaths","low_deaths","high_deaths","WestHem","Europe","Africa","MidEast","Asia","Oceania"]

# Assign standard regions to COW region codes
COW_extrastate_region = []
for i,r in COW_extrastate_df.iterrows():
    if r["WestHem"] == 1:
        COW_extrastate_region.append("Americas")
    elif r["Europe"] == 1:
        COW_extrastate_region.append("Europe")
    elif r["Africa"] == 1:
        COW_extrastate_region.append("Africa")
    elif r["MidEast"] == 1:
        COW_extrastate_region.append("Middle East")
    elif r["Asia"] == 1:
        COW_extrastate_region.append("Asia")
    elif r["Oceania"] == 1:
        COW_extrastate_region.append("Asia")
    else:
        COW_extrastate_region.append(np.nan)
COW_extrastate_df["region"] = COW_extrastate_region

COW_extrastate_df.drop(["WestHem","Europe","Africa","MidEast","Asia","Oceania"],axis=1,inplace=True)

# Assign values for various dataframe columns
COW_extrastate_df["UCDP_id"] = None # Not contained in COW dataframe
COW_extrastate_df["conflict_type"] = "State-based conflict"
COW_extrastate_df["state_conflict_type"] = "Extrasystemic"
COW_extrastate_df["side_a"] = None # Not contained in COW dataframe
COW_extrastate_df["side_b"] = None # Not contained in COW dataframe
COW_extrastate_df["country"] = None # Not contained in COW dataframe
COW_extrastate_df = COW_extrastate_df[conflict_years_columns]

COW_extrastate_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
0,1899,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,4245,4245,4245,,Asia
1,1900,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5329,4276,6662,,Asia
2,1901,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5330,4276,6663,,Asia
3,1902,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5330,4276,6664,,Asia
4,1899,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,400,400,400,,Africa
5,1900,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,400,400,400,,Africa
6,1901,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,550,550,550,,Africa
7,1902,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,600,600,600,,Africa
8,1903,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,1100,1100,1100,,Africa
9,1904,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,1050,1050,1050,,Africa


In [11]:
# Concatenate the COW interstate, intrastate and extrastate dataframes together
COW_df = pd.concat([COW_interstate_df,COW_intrastate_df,COW_extrastate_df])
COW_df["best_deaths"] = replace_unknowns(COW_df) # Replace -999 death toll codes
COW_df.sort_values(["start_year","COW_id","year"],inplace=True)
COW_df.reset_index(drop=True,inplace=True)
COW_df["COW_id"] = COW_df["COW_id"].replace({105:106,107:106,108:106,140:139,141:139}) # Elide all WW1 and WW2 fronts together
COW_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
0,1899,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,4245.0,4245,4245,,Asia
1,1900,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5329.0,4276,6662,,Asia
2,1901,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5330.0,4276,6663,,Asia
3,1902,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5330.0,4276,6664,,Asia
4,1899,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,400.0,400,400,,Africa
5,1900,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,400.0,400,400,,Africa
6,1901,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,550.0,550,550,,Africa
7,1902,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,600.0,600,600,,Africa
8,1903,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,1100.0,1100,1100,,Africa
9,1904,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,1050.0,1050,1050,,Africa


## Step 3: clean PRIO data ##

In [12]:
PRIO_df = PRIO_df[["year","startdate","id","type","sidea","sideb","bdeadbes","bdeadlow","bdeadhig","location","region"]]
PRIO_df.columns = ["year","startdate","UCDP_id","state_conflict_type","side_a","side_b","best_deaths","low_deaths","high_deaths","country","region"]

# Map the new UCDP war ids onto the old PRIO war ids
PRIO_UCDP_converter = pd.read_csv("source_data/PRIO_conflict_data/PRIO_3.1_to_UCDP_18.1_converter.csv")

PRIO_UCDP_converter_dict = {}
for i,r in PRIO_UCDP_converter.iterrows():
    try:
        PRIO_UCDP_converter_dict[int(r["old_id"])] = r["new_id"]
    except Exception:
        PRIO_UCDP_converter_dict[r["old_id"]] = r["new_id"]
        
PRIO_df["UCDP_id"] = PRIO_df["UCDP_id"].map(PRIO_UCDP_converter_dict)

PRIO_df["start_year"] = PRIO_df["startdate"].apply(lambda x: dateutil.parser.parse(x).year)
PRIO_df["COW_id"] = None # Not contained in PRIO dataframe
PRIO_df["conflict_type"] = "State-based conflict"

# Map conflict types onto the PRIO data
PRIO_state_conflict_type_dict = {1:"Extrasystemic",2:"Interstate",3:"Internal",4:"Internationalised internal"}
PRIO_df["state_conflict_type"] = PRIO_df["state_conflict_type"].map(PRIO_state_conflict_type_dict)

# Generate conflict name from combatants
PRIO_df["conflict_name"] = PRIO_df["side_a"] + " vs. " + PRIO_df["side_b"]

# Assign standard regions to PRIO region codes
PRIO_region_dict = {1:"Europe",2:"Middle East",3:"Asia",4:"Africa",5:"Americas"}
PRIO_df["region"] = PRIO_df["region"].map(PRIO_region_dict)

PRIO_df = PRIO_df[conflict_years_columns]
PRIO_df["best_deaths"] = replace_unknowns(PRIO_df) # Replace -999 death toll codes
PRIO_df.sort_values(["start_year","UCDP_id","year"],inplace=True)
PRIO_df.reset_index(drop=True,inplace=True)
PRIO_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
0,1946,1939,,207.0,State-based conflict,Extrasystemic,United Kingdom vs. IZL [Etzel],United Kingdom,IZL [Etzel],92.0,92,373,Israel,Middle East
1,1946,1941,,213.0,State-based conflict,Internal,Russia (Soviet Union) vs. UPA,Russia (Soviet Union),UPA,7942.0,1000,7942,Russia (Soviet Union),Europe
2,1947,1941,,213.0,State-based conflict,Internal,Russia (Soviet Union) vs. UPA,Russia (Soviet Union),UPA,6364.0,1000,6364,Russia (Soviet Union),Europe
3,1948,1941,,213.0,State-based conflict,Internal,Russia (Soviet Union) vs. UPA,Russia (Soviet Union),UPA,3263.0,1000,3263,Russia (Soviet Union),Europe
4,1949,1941,,213.0,State-based conflict,Internal,Russia (Soviet Union) vs. UPA,Russia (Soviet Union),UPA,512.0,25,999,Russia (Soviet Union),Europe
5,1950,1941,,213.0,State-based conflict,Internal,Russia (Soviet Union) vs. UPA,Russia (Soviet Union),UPA,512.0,25,999,Russia (Soviet Union),Europe
6,1946,1944,,210.0,State-based conflict,Internal,Russia (Soviet Union) vs. Forest Brothers,Russia (Soviet Union),Forest Brothers,410.0,25,410,Russia (Soviet Union),Europe
7,1947,1944,,210.0,State-based conflict,Internal,Russia (Soviet Union) vs. Forest Brothers,Russia (Soviet Union),Forest Brothers,71.0,25,71,Russia (Soviet Union),Europe
8,1948,1944,,210.0,State-based conflict,Internal,Russia (Soviet Union) vs. Forest Brothers,Russia (Soviet Union),Forest Brothers,186.0,25,186,Russia (Soviet Union),Europe
9,1946,1945,,204.0,State-based conflict,Extrasystemic,Netherlands vs. Indonesian Peoples Army,Netherlands,Indonesian Peoples Army,2827.5,655,5000,Indonesia,Asia


## Step 4: clean UCDP data ##

In [13]:
UCDP_state_conflict_df = UCDP_state_conflict_df[["year","conflict_id","type_of_conflict","side_a","side_b","bdbest","bdlow","bdhigh","location_inc","region"]]
UCDP_state_conflict_df.columns = ["year","UCDP_id","state_conflict_type","side_a","side_b","best_deaths","low_deaths","high_deaths","country","region"]

UCDP_state_conflict_df["start_year"] = None # Not contained in UCDP dataframe
UCDP_state_conflict_df["COW_id"] = None # Not contained in UCDP dataframe
UCDP_state_conflict_df["conflict_type"] = "State-based conflict"

# Map conflict types onto the UCDP data
UCDP_state_conflict_type_dict = {1:"Extrasystemic",2:"Interstate",3:"Internal",4:"Internationalised internal"}
UCDP_state_conflict_df["state_conflict_type"] = UCDP_state_conflict_df["state_conflict_type"].map(UCDP_state_conflict_type_dict)

# Generate conflict name from combatants
UCDP_state_conflict_df["conflict_name"] = UCDP_state_conflict_df["side_a"] + " vs. " + UCDP_state_conflict_df["side_b"]

# Assign standard regions to PRIO region codes
UCDP_region_dict = {1:"Europe",2:"Middle East",3:"Asia",4:"Africa",5:"Americas"}
UCDP_state_conflict_df["region"] = UCDP_state_conflict_df["region"].map(UCDP_region_dict)

UCDP_state_conflict_df = UCDP_state_conflict_df[conflict_years_columns]
UCDP_state_conflict_df["best_deaths"] = replace_unknowns(UCDP_state_conflict_df) # Replace -999 death toll codes
UCDP_state_conflict_df.sort_values(["UCDP_id","year"],inplace=True)
UCDP_state_conflict_df.reset_index(drop=True,inplace=True)

In [14]:
UCDP_state_conflict_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
0,1990,,,205,State-based conflict,Internal,Government of Iran vs. KDPI,Government of Iran,KDPI,31,31,335,Iran,Middle East
1,1993,,,205,State-based conflict,Internal,Government of Iran vs. KDPI,Government of Iran,KDPI,110,110,110,Iran,Middle East
2,1996,,,205,State-based conflict,Internal,Government of Iran vs. KDPI,Government of Iran,KDPI,27,27,29,Iran,Middle East
3,2016,,,205,State-based conflict,Internal,Government of Iran vs. KDPI,Government of Iran,KDPI,30,30,137,Iran,Middle East
4,1989,,,209,State-based conflict,Internal,"Government of Philippines vs. CPP, Military fa...",Government of Philippines,"CPP, Military faction (forces of Honasan, Aben...",822,801,885,Philippines,Asia
5,1990,,,209,State-based conflict,Internal,"Government of Philippines vs. CPP, Military fa...",Government of Philippines,"CPP, Military faction (forces of Honasan, Aben...",1165,1165,1174,Philippines,Asia
6,1991,,,209,State-based conflict,Internal,Government of Philippines vs. CPP,Government of Philippines,CPP,1514,1514,1933,Philippines,Asia
7,1992,,,209,State-based conflict,Internal,Government of Philippines vs. CPP,Government of Philippines,CPP,430,430,454,Philippines,Asia
8,1993,,,209,State-based conflict,Internal,Government of Philippines vs. CPP,Government of Philippines,CPP,99,99,114,Philippines,Asia
9,1994,,,209,State-based conflict,Internal,Government of Philippines vs. CPP,Government of Philippines,CPP,69,69,73,Philippines,Asia


## Step 5: patch datasets together ##

In [15]:
conflict_years_list = []

# Use only COW data between 1900 and 1988
conflict_years_list.append(COW_df[(COW_df["year"]>=1900) & (COW_df["year"]<=1988)])

# Use only UCDP data between 1989 and 2017
conflict_years_list.append(UCDP_state_conflict_df[(UCDP_state_conflict_df["year"]>=1989) & (UCDP_state_conflict_df["year"]<=2017)])
conflict_years_df = pd.concat(conflict_years_list)

In [16]:
conflict_years_df

Unnamed: 0,year,start_year,COW_id,UCDP_id,conflict_type,state_conflict_type,conflict_name,side_a,side_b,best_deaths,low_deaths,high_deaths,country,region
1,1900,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5329.0,4276,6662,,Asia
2,1901,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5330.0,4276,6663,,Asia
3,1902,1899,392,,State-based conflict,Extrasystemic,American-Philippino,,,5330.0,4276,6664,,Asia
5,1900,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,400.0,400,400,,Africa
6,1901,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,550.0,550,550,,Africa
7,1902,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,600.0,600,600,,Africa
8,1903,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,1100.0,1100,1100,,Africa
9,1904,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,1050.0,1050,1050,,Africa
10,1905,1899,393,,State-based conflict,Extrasystemic,Somali Rebellion,,,100.0,100,100,,Africa
12,1900,1899,395,,State-based conflict,Extrasystemic,Boer War of 1899,,,3106.0,3106,3106,,Africa


In [17]:
# Create dictionary of earliest year in each UCDP conflict
start_year_dict = {}
for i in list(conflict_years_df["UCDP_id"].unique()):
    conflict_years = conflict_years_df[conflict_years_df["UCDP_id"]==i]
    conflict_start_year = np.min(conflict_years["year"])
    start_year_dict[i] = conflict_start_year
start_year_dict

{None: nan,
 205: 1990,
 209: 1989,
 218: 1989,
 220: 1989,
 221: 1989,
 222: 1990,
 223: 1991,
 224: 1990,
 227: 1991,
 230: 2009,
 231: 1989,
 233: 1989,
 234: 1989,
 251: 1992,
 253: 1992,
 259: 1991,
 260: 1989,
 262: 1989,
 264: 1993,
 265: 2013,
 267: 1989,
 269: 1996,
 271: 1989,
 275: 1989,
 277: 1992,
 283: 1996,
 287: 1991,
 288: 1989,
 289: 1989,
 292: 1989,
 294: 2011,
 297: 2009,
 299: 2011,
 300: 1989,
 307: 2000,
 308: 1989,
 309: 1989,
 313: 1989,
 314: 1989,
 315: 1989,
 316: 1989,
 322: 1989,
 325: 2004,
 326: 1997,
 327: 1989,
 329: 1993,
 330: 1992,
 331: 1989,
 332: 1989,
 333: 1989,
 335: 1992,
 336: 1989,
 337: 1989,
 338: 1991,
 341: 1989,
 342: 1991,
 347: 1993,
 351: 1989,
 352: 1989,
 353: 2015,
 354: 1989,
 362: 1989,
 363: 1996,
 364: 1990,
 365: 1990,
 366: 1990,
 367: 1989,
 368: 1989,
 369: 1990,
 370: 1989,
 371: 1990,
 372: 1990,
 373: 1994,
 374: 1990,
 375: 1990,
 376: 1990,
 377: 1990,
 378: 1990,
 379: 1991,
 380: 1991,
 381: 1989,
 382: 1991,
 383

In [18]:
# Assign earliest year for conflicts with UCDP ids
conflict_years_start_year = []
for i,r in conflict_years_df.iterrows():
    if r["start_year"]:
        conflict_years_start_year.append(r["start_year"])
    else:
        conflict_years_start_year.append(start_year_dict[r["UCDP_id"]])

In [19]:
conflict_years_df["start_year"] = conflict_years_start_year

In [20]:
conflict_years_df.to_csv("output_data/conflict_years_df.csv")