# Pakistan Crime Data

    1. How many murders happened in different provinces
    2. In which Province most crimes have happened
    3. In which Province least crimes have happened
    4. More in depth crimes Analysis in Islamabad


In [186]:
import numpy as np 
import pandas as pd 
import os
import plotly.express as px


In [187]:
df = pd.read_csv('pakistan_crime_data.csv')

In [188]:
print(df.shape)
df.head()

(60, 12)


Unnamed: 0,_id,Year,Offence,Punjab,Sindh,KP,Balochistan,Islamabad,Railways,G.B,AJK,Pakistan
0,1,2012,Murder,6128,3726,2958,711,120,6,102,95,13846
1,2,2012,Attempt to Murder,7641,3732,2892,583,146,9,163,172,15338
2,3,2012,Kidnapping /Abduction,15699,3077,1052,386,70,6,32,288,20610
3,4,2012,Dacoity,2715,1341,60,98,22,1,8,12,4257
4,5,2012,Robbery,12181,4320,134,160,177,5,26,78,17081


In [189]:
# Dropping _id, and any row with "Others", and "Other Theft" as they dont really help
df2=df.drop('_id', axis=1)
#df2=df.drop(df.loc[((df['Offence'] == 'Others') | (df['Offence'] == 'Other Theft'))].index, axis=0)
df2

Unnamed: 0,Year,Offence,Punjab,Sindh,KP,Balochistan,Islamabad,Railways,G.B,AJK,Pakistan
0,2012,Murder,6128,3726,2958,711,120,6,102,95,13846
1,2012,Attempt to Murder,7641,3732,2892,583,146,9,163,172,15338
2,2012,Kidnapping /Abduction,15699,3077,1052,386,70,6,32,288,20610
3,2012,Dacoity,2715,1341,60,98,22,1,8,12,4257
4,2012,Robbery,12181,4320,134,160,177,5,26,78,17081
5,2012,Burglary,14740,1680,500,117,245,0,101,255,17638
6,2012,Cattle Theft,8115,630,118,77,43,0,23,40,9046
7,2012,Other Theft,34719,2976,717,332,585,560,71,142,40102
8,2012,Others,292665,57206,139344,5745,5699,1528,1033,4509,507729
9,2012,TOTAL RECORDED CRIME,394603,78688,147775,8209,7107,2115,1559,5591,645647


## Massaging data into a longer format and creating multiple dataframes for easier call-backs

In [190]:
df_long=pd.melt(df2, id_vars=['Year', 'Offence'], var_name='Province' ) # Creates the long formatting
df_long=df_long.drop(df_long.loc[df_long['Province']== 'Pakistan'].index,axis=0) # Dropping Pakistan as a whole

In [191]:
totals=df_long.loc[df_long['Offence']== 'TOTAL RECORDED CRIME']

In [192]:
df_long.head()

Unnamed: 0,Year,Offence,Province,value
0,2012,Murder,Punjab,6128
1,2012,Attempt to Murder,Punjab,7641
2,2012,Kidnapping /Abduction,Punjab,15699
3,2012,Dacoity,Punjab,2715
4,2012,Robbery,Punjab,12181


In [193]:
df_long=df_long.drop(df_long.loc[df_long['Offence']== 'TOTAL RECORDED CRIME'].index,axis=0)
df_long=df_long.drop(df_long.loc[((df_long['Offence'] == 'Others'))].index, axis=0)

In [194]:
df_long.loc[((df_long['Offence'] == 'Others'))].index

Int64Index([], dtype='int64')

## Crime Analysis

In [195]:
crime_by_prov=df_long.groupby('Province')['value'].sum()
max=df_long.groupby('Province')['value'].max()
px.bar( crime_by_prov, title='<b>Total Crimes by Province 2012-2017</b><br>Most Crimes:Punjab 2.37Mil<br>Least Crimes:G.B 9,254 ', labels={'_value':'Crime Total'}, 
            color= max, color_continuous_scale='Thermal')

## TODO 
### Subplots of crimes, by year for each province

This would need crimes, by province, by year

In [196]:
# Libraries for Plotly Subplots
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [197]:
df.columns

Index(['_id', 'Year', 'Offence', 'Punjab', 'Sindh', 'KP', 'Balochistan',
       'Islamabad', 'Railways', 'G.B', 'AJK', 'Pakistan'],
      dtype='object')

Setting up individual df for plots. Possibly turn this into a function?

In [198]:
punjab_crime= df_long.loc[df_long['Province']=='Punjab'].groupby('Offence')['value'].sum()
sindh_crime=df_long.loc[df_long['Province']=='Sindh'].groupby('Offence')['value'].sum()
kp_crime=df_long.loc[df_long['Province']=='KP'].groupby('Offence')['value'].sum()
balochistan_crime=df_long.loc[df_long['Province']=='Balochistan'].groupby('Offence')['value'].sum()
islamabad_crime=df_long.loc[df_long['Province']=='Islamabad'].groupby('Offence')['value'].sum()
railways_crime=df_long.loc[df_long['Province']=='Railways'].groupby('Offence')['value'].sum()
gb_crime=df_long.loc[df_long['Province']=='G.B'].groupby('Offence')['value'].sum()
ajk_crime=df_long.loc[df_long['Province']=='AJK'].groupby('Offence')['value'].sum()


In [199]:
fig = make_subplots(
        rows=8,
        cols=2,
        subplot_titles=("Punjab", 'Sindh','','', 'KP', 'Balochistan','','', 'Islamabad', 'Railways','','', 'GB', 'AJK')

)
# Punjab
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=1, col=1)
# Sindh
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=1, col=2)
# KP
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=3, col=1)
# Balochistan
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=3, col=2)
# Islamabad
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=5, col=1)
# Railways
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=5, col=2)
# GB
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=7, col=1)
#AJK
fig.add_trace(go.Bar(x=punjab_crime.index, y=punjab_crime.values), row=7, col=2)

fig.update_layout(
    title_text='Crimes by Province',
    autosize=True,
    width=1000,
    height=1000,
    paper_bgcolor='#006600',
    font_color= 'white',
    showlegend=False
    )

