In [17]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
import seaborn as sns

In [39]:
df = pd.read_csv('data/conflict_data.csv')
df.rename(columns={'id': 'ID', 
                    'year': 'Year', 
                    'month': 'Month', 
                    'type_of_violence': 'Type', 
                    'dyad_new_id': 'CID', 
                    'where_prec': 'Where',
                    'latitude': 'Lat',
                    'longitude': 'Long',
                    'best': 'Casualties'}, inplace=True)
                    
df = df[(df['Where'] < 5)]
df = df.drop(['Where'], axis=1)
df.head()

Unnamed: 0,Year,Month,Type,CID,Lat,Long,Casualties
0,1989,1,1,411,14.6042,120.9822,42
1,1989,1,1,411,6.9514,125.0992,8
2,1989,1,1,411,7.27,124.3106,4
3,1989,2,1,411,16.0,121.75,4
4,1989,2,1,411,14.6042,120.9822,4


In [66]:
fig2 = px.scatter_geo(df, 
                        lat="Lat", 
                        lon="Long")
fig2.update_geos(
    center=dict(lat=14.5995, lon=120.9842),
    fitbounds="locations",
)
fig2.update_layout(height=500,
                    width=800,
                    title="Conflict Data"
                    )
fig2.show()

In [41]:
X = df
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)

In [42]:
inertia = []
for i in range(1, 100):
    kmeans = KMeans(n_clusters=i, 
                    n_init=10, 
                    tol=1e-4, 
                    random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

In [43]:
fig = go.Figure(data=go.Scatter(x=np.arange(1,100), y=inertia))
fig.update_layout(title="Inertia vs Cluster Number",xaxis=dict(range=[0,100],title="Cluster Number"),
                  yaxis={'title':'Inertia'},
                 annotations=[
        dict(
            x=3,
            y=inertia[2],
            xref="x",
            yref="y",
            text="Elbow!",
            showarrow=True,
            arrowhead=7,
            ax=20,
            ay=-40
        )
    ])

In [44]:
kmeans = KMeans(
    n_clusters=7,
    init="k-means++",
    n_init=10,
    random_state=42,
    tol=1e-4
)
kmeans.fit(X)

In [45]:
clusters = pd.DataFrame(X, columns=df.columns)
clusters['label'] = kmeans.labels_

In [46]:
polar=clusters.groupby("label").mean().reset_index()
polar=pd.melt(polar,id_vars=["label"])
fig4 = px.line_polar(polar, r="value", theta="variable", color="label", line_close=True,height=800,width=1400)
fig4.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [47]:
pie=clusters.groupby('label').size().reset_index()
pie.columns=['label','value']
px.pie(pie,values='value',names='label')

In [65]:
df['Cluster'] = kmeans.labels_
df.head()
fig2 = px.scatter_geo(df, 
                        lat="Lat", 
                        lon="Long",
                        color="Cluster",
                        size="Casualties")
fig2.update_geos(
    center=dict(lat=14.5995, lon=120.9842),
    fitbounds="locations",
)
fig2.update_layout(height=500,
                    width=800,
                    title="Conflict Data"
                    )
fig2.show()