## Imports

In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
from pymongo import MongoClient
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import string

In [None]:
from sshtunnel import SSHTunnelForwarder
from getpass import getpass

## Connnection

In [None]:
MONGO_HOST = "flask01.network.ncf.edu"
MONGO_DB = "hfmil"
MONGO_USER = input("Enter your username: ")
MONGO_PASS = getpass("Enter your password: ")

In [None]:
server = SSHTunnelForwarder(
    MONGO_HOST,
    ssh_username=MONGO_USER,
    ssh_password=MONGO_PASS,
    remote_bind_address=('127.0.0.1', 27017)
)

In [None]:
server.start()

In [None]:
client = MongoClient('127.0.0.1', server.local_bind_port)

In [None]:
db = client[MONGO_DB]

In [None]:
db.list_collection_names()

## Conflict Data

In [None]:
cur = db.conflicts.find({},{'_id':0,'location':1,'side_a':1,'side_b':1,'year':1, 'start_date':1, 'ep_end_date':1, 'region':1})

In [None]:
df = pd.DataFrame()
for i in range(cur.count()):
    temp = cur.next()
    df = df.append(temp, ignore_index=True)

In [None]:
df

In [None]:
# cleans the text to isolate country name
def cleanSide(side):
    nopunc = [c for c in side if c not in string.punctuation]
    nopunc = ''.join(nopunc)
    return [word for word in nopunc.split() if word.lower() not in ['government','of','the']]

In [None]:
df['side_a'] = df['side_a'].apply(cleanSide)

In [None]:
df['side_b'] = df['side_b'].apply(cleanSide)

In [None]:
df['location'] = df['location'].apply(cleanSide)

In [None]:
df.head()

## Connecting Conflicts and HFI

In [None]:
cur = db.hfi19.find({},{'_id':0,'ISO_code':1,'countries':1,'hf_score':1, 'year':1})

In [None]:
hfDf = pd.DataFrame()
for i in range(cur.count()):
    temp = cur.next()
    hfDf = hfDf.append(temp, ignore_index=True)

In [None]:
hfDf = hfDf.rename({'countries':'country'}, axis = 1)

In [None]:
hfDf.head()

## Query conflicts side A

In [None]:
def getConflicts(country):
    cur = db.conflicts.find({'side_a': {'$regex': country, '$options': 'i'}},{'_id':0, 'conflict_id':1})
    conflicts = set()
    while True:
        try:
            temp = cur.next()
            if len(temp) > 0:
                conflicts.add(temp['conflict_id'])
        except:
            break
    return list(conflicts)

In [None]:
def getNumConflicts(country):
    cur = db.conflicts.find({'side_a': {'$regex': country, '$options': 'i'}},{'_id':0, 'conflict_id':1})
    conflicts = set()
    while True:
        try:
            temp = cur.next()
            if len(temp) > 0:
                conflicts.add(temp['conflict_id'])
        except:
            break
    return len(list(conflicts))

In [None]:
def getNumOp(country):
    cur = db.conflicts.find({'side_a': {'$regex': country, '$options': 'i'}},{'_id':0, 'conflict_id':1})
    conflicts = []
    while True:
        try:
            temp = cur.next()
            if len(temp) > 0:
                conflicts.append(temp['conflict_id'])
        except:
            break
    return len(conflicts)

In [None]:
hfDf['num_conflicts_side_A'] = hfDf['country'].apply(getNumConflicts)

In [None]:
hfDf['conflicts_side_A'] = hfDf['country'].apply(getConflicts)

In [None]:
hfDf['num_ops_side_A'] = hfDf['country'].apply(getNumOp)

In [None]:
hfDf.sort_values(by = "num_conflicts_side_A", ascending = False)

## Conflicts per country side A
(for countries with more than one conflict)

In [None]:
plt.figure(figsize=(15,6))
sns.barplot(data = hfDf[hfDf['num_conflicts_side_A'] > 0].sort_values('num_conflicts_side_A',ascending=False), 
            x = 'ISO_code', y = 'num_conflicts_side_A',
           color = '#3eb077')
plt.xticks(rotation = 90)
plt.tick_params(axis='x', which='major', labelsize=10)
plt.tight_layout(h_pad=2)
plt.savefig('num_conflicts_side_A.png')

## Operation per country side A
(most conflicts are composed of more than one operation)

In [None]:
plt.figure(figsize=(15,6))
sns.barplot(data = hfDf[hfDf['num_ops_side_A'] > 0].sort_values('num_ops_side_A',ascending=False), 
            x = 'ISO_code', y = 'num_ops_side_A',
           color = '#3eb077')
plt.xticks(rotation = 90)
plt.tick_params(axis='x', which='major', labelsize=10)
plt.tight_layout(h_pad=2)
plt.savefig('num_ops_side_A.png')

## HF score vs. number of conflicts side A

In [None]:
d17 = hfDf[hfDf.year == 2017]
plt.figure(figsize=(10,5))
sns.scatterplot(data=d17, x = 'num_conflicts_side_A', y = 'hf_score', size='num_ops_side_A')

## Operations per country choropleth
(where country is side A)

In [None]:
p = px.choropleth(data_frame=hfDf, locations='ISO_code', color= 'num_ops_side_A')
p.show()

## Query conflicts side B

In [None]:
def getConflicts(country):
    cur = db.conflicts.find({'side_b': {'$regex': country, '$options': 'i'}},{'_id':0, 'conflict_id':1})
    conflicts = set()
    while True:
        try:
            temp = cur.next()
            if len(temp) > 0:
                conflicts.add(temp['conflict_id'])
        except:
            break
    return list(conflicts)

In [None]:
def getNumConflicts(country):
    cur = db.conflicts.find({'side_b': {'$regex': country, '$options': 'i'}},{'_id':0, 'conflict_id':1})
    conflicts = set()
    while True:
        try:
            temp = cur.next()
            if len(temp) > 0:
                conflicts.add(temp['conflict_id'])
        except:
            break
    return len(list(conflicts))

In [None]:
def getNumOp(country):
    cur = db.conflicts.find({'side_b': {'$regex': country, '$options': 'i'}},{'_id':0, 'conflict_id':1})
    conflicts = []
    while True:
        try:
            temp = cur.next()
            if len(temp) > 0:
                conflicts.append(temp['conflict_id'])
        except:
            break
    return len(conflicts)

In [None]:
hfDf['num_conflicts_side_B'] = hfDf['country'].apply(getNumConflicts)

In [None]:
hfDf['conflicts_side_B'] = hfDf['country'].apply(getConflicts)

In [None]:
hfDf['num_ops_side_B'] = hfDf['country'].apply(getNumOp)

In [None]:
hfDf.sort_values(by = "num_conflicts_side_B", ascending = False)

## Conflicts per country side B
(for countries with more than one conflict)

In [None]:
plt.figure(figsize=(15,6))
sns.barplot(data = hfDf[hfDf['num_conflicts_side_B'] > 0].sort_values('num_conflicts_side_B',ascending=False), 
            x = 'ISO_code', y = 'num_conflicts_side_B',
           color = '#3eb077')
plt.xticks(rotation = 90)
plt.tick_params(axis='x', which='major', labelsize=10)
plt.tight_layout(h_pad=2)
plt.savefig('num_conflicts_side_B.png')

## Operation per country side B
(most conflicts are composed of more than one operation)

In [None]:
plt.figure(figsize=(15,6))
sns.barplot(data = hfDf[hfDf['num_ops_side_B'] > 0].sort_values('num_ops_side_B',ascending=False), 
            x = 'ISO_code', y = 'num_ops_side_B',
           color = '#3eb077')
plt.xticks(rotation = 90)
plt.tick_params(axis='x', which='major', labelsize=10)
plt.tight_layout(h_pad=2)
plt.savefig('num_ops_side_B.png')

## Adding military expenditure proportion

In [None]:
years = list(hfDf.year.unique())
years

In [None]:
tempdf = pd.DataFrame()
for year in years:
    codes = hfDf.loc[hfDf['year']==year,'ISO_code']
    for code in codes:
        prop = db.exp_prop.find_one({'Code':code},{'_id':0, str(int(year)):1})
        try:
            prop = list(prop.values())[0]
        except:
            prop = None
        tempdf = tempdf.append({'ISO_code':code, 'year':year,'prop_gdp':prop}, 
                               ignore_index=True)

In [None]:
tempdf

In [None]:
hfDf = hfDf.merge(tempdf, on = ['ISO_code', 'year'])

In [None]:
hfDf

In [None]:
hfDf.describe()

## HF score per year

In [None]:
fig = go.Figure()

years = [year for year in hfDf.year.unique()]

for year in years:
    fig.add_trace(
        go.Choropleth(
            z = hfDf.loc[hfDf['year']==year,'hf_score'],
            locations = hfDf.loc[hfDf['year']==year,'ISO_code'],
            zmax = 9.12,
            zmin = 3.5))

fig.data[9].visible = True


steps = []
for i in range(len(years)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "HF scores for the year: " + str(int(years[i]))}],
        label = str(int(years[i]))
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    
sliders = [dict(
    active=10,
    currentvalue={"prefix": "Year: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

 ## Military Expenditure Per Year

In [None]:
fig = go.Figure()

years = [year for year in hfDf.year.unique()]

for year in years:
    fig.add_trace(
        go.Choropleth(
            z = hfDf.loc[hfDf['year']==year,'prop_gdp'],
            locations = hfDf.loc[hfDf['year']==year,'ISO_code'],
            zmax = 0.1,
            zmin = 0))

fig.data[9].visible = True


steps = []
for i in range(len(years)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Military expenditure as proportion of gdp for the year: " + str(int(years[i]))}],
        label = str(int(years[i]))
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    
sliders = [dict(
    active=10,
    currentvalue={"prefix": "Year: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

## Conflict info

In [None]:
ic_df = hfDf.loc[hfDf.year == 2017,['ISO_code', 'conflicts_side_A', 'conflicts_side_B']]

In [None]:
con_info = pd.DataFrame()
for i in range(len(ic_df)):
    temp = ic_df.iloc[i,:]
    conflicts = temp['conflicts_side_A']
    conflicts.append(temp['conflicts_side_B'])
    code = temp['ISO_code']
    for con in conflicts:
        cur = db.conflicts.find({'conflict_id':con}, 
                               {'_id':0, 'location':1,'side_a':1,
                                'side_b':1, 'year':1,'intensity_level':1,
                                'cumulative_intensity':1,
                                'start_date':1,'ep_end_date':1
                               })
        while True:
            try:
                nxt = cur.next()
                nxt['ISO_code'] = code
                nxt['conflict_id'] = con
                con_info = con_info.append(nxt, ignore_index = True)
            except:
                break

In [None]:
con_info.describe()

In [None]:
con_info.head()

In [None]:
con_info.describe()

In [None]:
con_info.sort_values('year', ascending=False, inplace=True)

In [None]:
con_info.sort_values('year', ascending=False, inplace=True)

fig = go.Figure()

years = [year for year in con_info.year.unique()]


for year in years:
    fig.add_trace(
        go.Choropleth(
            z = con_info.loc[con_info['year']==year,'cumulative_intensity'],
            locations = hfDf.loc[con_info['year']==year,'ISO_code'],
            zmax = 1,
            zmin = 0,
            text = con_info.loc[:,['conflict_id','location','side_a','side_b']]
        ))

fig.data[-1].visible = True


steps = []
for i in range(len(years)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Cumulative conflict intensity level per year: " + str(int(years[i]))}],
        label = str(int(years[i]))
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    
sliders = [dict(
    active=10,
    currentvalue={"prefix": "Year: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

In [None]:
con_info.sort_values('year', ascending=False, inplace=True)

fig = go.Figure()

years = [year for year in con_info.year.unique()]


for year in years:
    fig.add_trace(
        go.Choropleth(
            z = con_info.loc[con_info['year']==year,'intensity_level'],
            locations = con_info.loc[con_info['year']==year,'ISO_code'],
            zmax = 2,
            zmin = 1,
            text = con_info.loc[:,['conflict_id','location','side_a','side_b']]
            ))

fig.data[-1].visible = True


steps = []
for i in range(len(years)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Conflict intensity level per year: " + str(int(years[i]))}],
        label = str(int(years[i]))
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    
sliders = [dict(
    active=10,
    currentvalue={"prefix": "Year: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

In [None]:
client.close()

In [None]:
server.close()

In [None]:
server.is_active