In [2]:
from dynetworkx.classes.impulsedigraph import ImpulseDiGraph
import pickle
import pandas as pd
import datetime

In [3]:
MIDI = pd.read_csv('Incident_Level_5_01/MIDI_5.01.csv')
MIDIP = pd.read_csv('Incident_Level_5_01/MIDIP_5.01.csv')

In this notebook, I build the conflict network from the MIDIP dataset, with some information from the MIDI dataset. The network built is a Impulse DiGraph, which uses only the start date of each incident. Edges are ordered by the incident start date. Edges occuring at the same time are ordered using ord_incidnum. Each edges are also added Gaussian noise with mean=0, std=1 to each timestamps. Therefore, for joint attacks, whose edges have same start date and ord_incidnum, are still ordred randomly. The result of this notebook is a list of 10 different networks with different Gaussian noise.

# Cleaning data

In [4]:
# convert dates to timestamps (numeric values) and fix missing values
# 
# missing values are replaced either by:
#     * start or end dates of the incident (many participants can be involved in the incidents at different time 
# during the event, if start/end dates of a participant is missing, they will be replaced by the start/end dates
# of the incident that they are in) 
#     * first or last day/month of the month/year. 

def convert_to_timestamp():
    sttimestamp = []
    endtimestamp = []
    for idx, value in MIDIP.iterrows():
        ### convert start dates to timestamps ###
        
        # incident date
        incid_styear = int(MIDI[MIDI['ord_incidnum'] == value['ord_incidnum']]['styear']) 
        incid_stmon = int(MIDI[MIDI['ord_incidnum'] == value['ord_incidnum']]['stmon'])
        incid_stday = int(MIDI[MIDI['ord_incidnum'] == value['ord_incidnum']]['stday'])

        # fill missing values
        year = value['styear']
        
        if value['stmon'] == -9:
            if incid_stmon == -9 or year != incid_styear:
                month = 1
            else:
                month = incid_stmon
        else:
            month = value['stmon']


        if value['stday'] == -9:
            if incid_stday == -9 or month != incid_stmon or year != incid_styear :
                day = 1
            else:
                day = incid_stday
        else:
            day = value['stday']
        
        # to timestamp
        sttimestamp.append(datetime.datetime(year, month, day).timestamp())
        
        ### convert end dates to timstamps ###
        
        # incident date
        incid_endyear = int(MIDI[MIDI['ord_incidnum'] == value['ord_incidnum']]['endyear']) 
        incid_endmon = int(MIDI[MIDI['ord_incidnum'] == value['ord_incidnum']]['endmon'])
        incid_endday = int(MIDI[MIDI['ord_incidnum'] == value['ord_incidnum']]['endday'])
        
        # fill missing values
        year2 = value['endyear']
        if value['endmon'] == -9:
            if incid_endmon == -9 or year2 != incid_endyear:
                month2 = 12
            else:
                month2 = incid_endmon
        else:
            month2 = value['endmon']

        if value['endday'] == -9:
            if incid_endday == -9 or month2 != incid_endmon or year2 != incid_endyear:
                if month2 in [1,3,5,7,8,10,12]:
                    day2 = 31
                elif month2 == 2:
                    if year2 % 4 == 0:
                        day2 = 29
                    else:
                        day2 = 28
                else:
                    day2 = 30
            else:
                day2 = incid_endday
        else:
            day2 = value['endday']

        # to timstamp
        endtimestamp.append(datetime.datetime(year2, month2, day2).timestamp())

    #    
    MIDIP['sttimestamp'] = sttimestamp
    MIDIP['endtimestamp'] = endtimestamp

convert_to_timestamp()

In [5]:
# participants in incidnum 4182466, 4581015, and 4483034, have same start date, incidnum, but different end dates
# since the date difference is only 2, and there are only 3 instances of this inconsistency,
# I'll just change the different one to math the other
MIDIP.loc[3361,['endday','endmon']] = 29, 6
MIDIP.loc[7555,'endday'] = 18
MIDIP.loc[6147,'endday'] = 26

# update the timestamps with these new values
convert_to_timestamp()

In [6]:
# there are 2 rows with end date before start date. I'm just dropping those.
MIDIP = MIDIP[MIDIP['sttimestamp'] <= MIDIP['endtimestamp']]

# Build Graph

In [7]:
# We build 10 different networks with different Gaussian noise
#
# The network is built only using the start timestamp of each incident. Therefore, the result network is
# and Impulse Directed Graph
#
# Edges are ordered by sttimestamp, then ord_incidnum (which is ordered id of each incidents happening in 
# the same date)

incidents = list(MIDIP[MIDIP['sttimestamp'] <= MIDIP['endtimestamp']].sort_values(['sttimestamp','ord_incidnum']).groupby(['sttimestamp','ord_incidnum']))
one_day = 86400  # seconds in 1 day

graphs = list()
for i in range(10): # generate 10 graphs
    G = ImpulseDiGraph()
    
    j = 0 
    while j < len(incidents):
        # collect all incidents having same time together
        prev_time = incidents[j][0][0]
        same_time_incidents = list()
        while incidents[j][0][0] == prev_time:
            same_time_incidents.append(incidents[j])
            j += 1

            if j == len(incidents):
                break

        # add incidents happening in the same date together to the network
        add = 0
        for incident in same_time_incidents:
            idx, value = incident
            # this is used to make incidents happened in the date to be uniformly spreaded througout the date
            # e.g. If there is 1 incident in a day, its time will be 12pm. If there are 2, one is at 8am, one is
            # at 16pm.
            add += one_day/(len(same_time_incidents)+ 1) 
            
            # get attacking states
            active = list()
            for i, row in value.iterrows():
                if row['insidea'] == 1:
                    active.append(row)

            # get states being attacked
            passive = list()
            if len(active) != 0:
                p = list(MIDIP[(MIDIP['ord_incidnum'] == idx[1]) & (MIDIP['insidea'] == 0)].iterrows())
                for i, row in p:
                    passive.append(row)
                    
            # add edges
            for a in active:
                for p in passive:
                    ## assign the timestamp to be which country joins after. 
                    # For example: country A' start day maybe 1 if some other country attack it on day 1. But if 
                    # country B which attacks country A has start day of 4, the edge B -> A will have timestamp = 4
                    if a['sttimestamp'] >= p['sttimestamp']:
                        timestamp = a['sttimestamp']
                    else:
                        timestamp = p['sttimestamp']

                    G.add_edge(a['stabb'], p['stabb'], timestamp + add + np.random.normal(loc=0, scale=1))
                    
    graphs.append(G)

In [8]:
with open('temp/graphs.pickle','wb') as f:
    pickle.dump(graphs, f)

In [9]:
graphs

[<dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9ee0>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff37815cdf0>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9be0>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9760>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9c70>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9bb0>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9af0>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9340>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e9610>,
 <dynetworkx.classes.impulsedigraph.ImpulseDiGraph at 0x7ff34e2e90d0>]