In [27]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
import seaborn as sns

In [28]:
df = pd.read_csv('data/conflict_data.csv')
df.rename(columns={'id': 'ID', 
                    'year': 'Year', 
                    'month': 'Month', 
                    'type_of_violence': 'Type',
                    'side_a': 'Side A',
                    'side_b': 'Side B',
                    'side_a_new_id': 'Side A ID',
                    'side_b_new_id': 'Side B ID',
                    'dyad_new_id': 'CID',
                    'dyad_name': 'Actors',
                    'where_coordinates': 'Location',
                    'where_prec': 'Where',
                    'latitude': 'Lat',
                    'longitude': 'Long',
                    'best': 'Casualties'}, inplace=True)
                    
df = df[(df['Where'] < 5)]
df.head()

Unnamed: 0,ID,Year,Month,Type,Side A ID,Side A,Side B ID,Side B,Where,Location,Lat,Long,Casualties
0,123403,1989,1,1,5,Government of Philippines,6,CPP,1,Manila City,14.6042,120.9822,42.0
1,124029,1989,1,1,5,Government of Philippines,6,CPP,1,Makilala town,6.9514,125.0992,8.0
2,125199,1989,1,1,5,Government of Philippines,6,CPP,1,Sultan Kudarat town,7.27,124.3106,4.0
3,122856,1989,2,1,5,Government of Philippines,6,CPP,4,Aurora province,16.0,121.75,4.0
4,123430,1989,2,1,5,Government of Philippines,6,CPP,2,Manila City,14.6042,120.9822,4.0


In [29]:
fig2 = px.scatter_geo(df, 
                        lat="Lat", 
                        lon="Long")
fig2.update_geos(
    center=dict(lat=14.5995, lon=120.9842),
    fitbounds="locations",
)
fig2.update_layout(height=500,
                    width=800,
                    title="Conflict Data"
                    )
fig2.show()

In [30]:
df2 = df.drop(['ID', 'Where', 'Month', 'Location', 'Year', 'Side A', 'Side B'], axis=1)
scaler = MinMaxScaler()
scaler.fit(df2)
X = scaler.transform(df2)
df2.head()

Unnamed: 0,Type,Side A ID,Side B ID,Lat,Long,Casualties
0,1,5,6,14.6042,120.9822,42.0
1,1,5,6,6.9514,125.0992,8.0
2,1,5,6,7.27,124.3106,4.0
3,1,5,6,16.0,121.75,4.0
4,1,5,6,14.6042,120.9822,4.0


In [31]:
inertia = []
for i in range(1, 100):
    kmeans = KMeans(n_clusters=i, 
                    n_init=10, 
                    tol=1e-4, 
                    random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

In [32]:
fig = go.Figure(data=go.Scatter(x=np.arange(1,100), y=inertia))
fig.update_layout(title="Inertia vs Cluster Number",xaxis=dict(range=[0,100],title="Cluster Number"),
                  yaxis={'title':'Inertia'},
                 annotations=[
        dict(
            x=4,
            y=inertia[3],
            xref="x",
            yref="y",
            text="Elbow!",
            showarrow=True,
            arrowhead=7,
            ax=20,
            ay=-40
        )
    ])

In [33]:
kmeans = KMeans(
    n_clusters=4,
    init="k-means++",
    n_init=10,
    random_state=42,
    tol=1e-4
)
kmeans.fit(X)

In [34]:
clusters = pd.DataFrame(X, columns=df2.columns)
clusters['label'] = kmeans.labels_

In [38]:
polar=clusters.groupby("label").mean().reset_index()
polar=pd.melt(polar,id_vars=["label"])
fig4 = px.line_polar(polar, r="value", title="Cluster Parameters", theta="variable", color="label", line_close=True,height=500,width=500)
fig4.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [40]:
pie=clusters.groupby('label').size().reset_index()
pie.columns=['label','value']
px.pie(pie,values='value',names='label', height=500,width=500, title="Cluster Distribution")

In [37]:
df['Cluster'] = kmeans.labels_

fig2 = px.scatter_geo(df, 
                        size="Casualties",
                        lat="Lat", 
                        lon="Long",
                        color="Cluster",
                        hover_name="ID",
                        hover_data=["Year", "Type", "Casualties", "Side A", "Side B"]
                        )
fig2.update_geos(
    center=dict(lat=14.5995, lon=120.9842),
    fitbounds="locations",
)
fig2.update_layout(height=500,
                    width=800,
                    title="Conflict Data"
                    )
fig2.show()