#### _IMPORTING LIBRARIES_

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import cufflinks as cf
import folium
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import datetime
from folium.plugins import MarkerCluster, HeatMap, AntPath

cf.go_offline()
init_notebook_mode()

: 

#### _READING DATA_

In [None]:
data = pd.read_excel(r"./mmap.xlsx")
land_code = pd.read_excel(r"./londonimddecile.xlsx")
land_pop = pd.read_excel(r"./boroughpop.xlsx")

In [None]:
# Making copies of the dataframes, so we will have our data intact even if something goes wrong.
df1 = data.copy()
df2 = land_code.copy()
df3 = land_pop.copy()

In [None]:
df1.shape, df2.shape, df3.shape

#### _EDA_

In [None]:
df1.head()

In [None]:
df1.info()

Based on the above information about dataset, we are going to:

1. **Drop all features relating to the suspect** since these features have close to 50% of their values missing.

2. **Drop the easting and the northing features** since the longitude and latitude features are what we will be using.

3. **Drop the ID column** so we use the index provided by pandas.

4. **Fill missing values** for the remaining columns if there are any null values.


In [None]:
# Dropping features about the suspect.
df1.drop(columns=['susno', 'susage', 'sussex', 'susagegp'], axis=1, inplace=True)

In [None]:
# Dropping ID, easting and northing features.
df1.drop(columns=['easting', 'northing', 'ID'], axis='columns', inplace=True)

In [None]:
df1

In [None]:
df1.isnull().sum()

##### _Exploring the date feature_

In [None]:
# Extract the year from the date and create a new column('year) which holds them.
df1['year'] = [datetime.datetime.strptime(str(date), "%Y-%m-%d %H:%M:%S").year for date in df1.date]

In [None]:
df1

In [None]:
df1.year.value_counts()

In [None]:
# Finding the annual distribution of homicides 
sns.set_style('darkgrid')
sns.set(font_scale=1)
fig, ax = plt.subplots(figsize=(20,12))
ax = sns.countplot(data=df1, x="year", color=sns.color_palette()[0], order=(np.arange(2008,2019)))
ax.set_title('annual distribution of homicides in london'.upper(), fontsize=15)
ax.set_xlabel('Year', fontsize=15)
ax.set_ylabel('Homicide Count', fontsize=12)
for i in ax.patches:
    ax.text(i.get_x() + i.get_width()/2, i.get_height()+2, i.get_height(), horizontalalignment='center', fontweight='bold')

##### _Exploring the age and the Age group of victims_

In [None]:
# Exploring the age group of victims from 2008 - 2018
df1.vicagegp.value_counts()

In [None]:
sns.set_style('whitegrid')
fig, ax = plt.subplots(figsize=(20,12))
ax = sns.countplot(data=df1.sort_values(by=['vicagegp']), x="vicagegp", color=sns.color_palette()[4])
ax.set_title('distribution of homicides based on victim age groups'.upper(), fontsize=15)
ax.set_xlabel('Victim Age Group', fontsize=15)
ax.set_ylabel('Homicide Count', fontsize=12)
for i in ax.patches:
    ax.text(i.get_x() + i.get_width()/2, i.get_height()+2, i.get_height(), horizontalalignment='center', fontweight='bold')

Most victims of homicide are within the 25-34 age group, closely followed by people within the 20-24 who are also followed by people within the 35-44 group.

We can say adults within the age of 20-44 are more likely to be killed.

While children(0-12) are least likely to be killed.

In [None]:
# A distribution of homicides as per the various age groups over the years.
for year in np.arange(2008, 2019):
    sns.set_style('whitegrid')
    fig, ax = plt.subplots(figsize=(20,12))
    ax = sns.countplot(data=df1[df1.year == year].sort_values(by=['vicagegp']), x="vicagegp", color=sns.color_palette()[9])
    ax.set_title('distribution of homicides based on victim age groups in {}'.format(year).upper(), fontsize=25)
    ax.set_xlabel('Victim Age Group', fontsize=15)
    ax.set_ylabel('Homicide Count', fontsize=12)
    for i in ax.patches:
        ax.text(i.get_x() + i.get_width()/2, i.get_height(), i.get_height(), horizontalalignment='center', fontweight='bold')

In [None]:
df1.info()

##### _Exploring Victim Sex_

In [None]:
sns.set_style('whitegrid')
fig, ax = plt.subplots(figsize=(20,10))
ax = sns.countplot(data=df1, x="vicsex")
ax.set_title('distribution of homicides based on Victim sex'.upper(), fontsize=15)
ax.set_xlabel('Victim Sex', fontsize=15)
ax.set_ylabel('Homicide Count', fontsize=12)
for i in ax.patches:
    ax.text(i.get_x() + i.get_width()/2, i.get_height()+2, round(i.get_height(), 2), horizontalalignment='center', fontweight='bold')


In [None]:
fig, ax = plt.subplots(figsize=(15,5))
ax = plt.pie(df1.vicsex.value_counts(), labels=['Male', 'Female'], explode=[0.1,0], shadow=True, radius=1.5, autopct='%1.2f%%')

From 2008 - 2018 more male of been murdered than females.

During the said period, the number of males murdered was 3 times the number of murdered females. 

In [None]:
sns.set_style('whitegrid')
fig, ax = plt.subplots(figsize=(20,12))
ax = sns.countplot(data=df1, x="year", hue="vicsex")
ax.set_title('distribution of homicides based on victim sex'.upper(), fontsize=15)
ax.set_xlabel('Year', fontsize=15)
ax.set_ylabel('Homicide Count', fontsize=12)
for i in ax.patches:
    ax.text(i.get_x() + i.get_width()/2, i.get_height()+1, i.get_height(), horizontalalignment='center', fontweight='bold')

##### _Exploring Ethnicity Of Victim_

In [None]:
# df1.vicethnic.value_counts() 
# df1.vicethnic.isnull().sum()

In [None]:
# Replacing missing values with the mode of the colomn.
df1.vicethnic.fillna('White or White British', inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(20,12))
ax = sns.countplot(data=df1, x="vicethnic", color=sns.color_palette()[9])
ax.set_title('distribution of homicides based on victim ethnicity'.upper(), fontsize=25)
ax.set_xlabel('Victim Ethnicity', fontsize=15)
ax.set_ylabel('Homicide Count', fontsize=12)
for i in ax.patches:
    ax.text(i.get_x() + i.get_width()/2, i.get_height(), i.get_height(), horizontalalignment='center', fontweight='bold')

In [None]:
# A distribution of homicides as per the ethnicity of the victim over the years.
for year in np.arange(2008, 2019):
    sns.set_style('whitegrid')
    fig, ax = plt.subplots(figsize=(20,12))
    ax = sns.countplot(data=df1, x="year", hue="vicethnic", color=sns.color_palette()[9])
    ax.set_title('distribution of homicides based on victim ethnicity in {}'.format(year).upper(), fontsize=25)
    ax.set_xlabel('Year', fontsize=15)
    ax.set_ylabel('Homicide Count', fontsize=12)
    for i in ax.patches:
        ax.text(i.get_x() + i.get_width()/2, i.get_height(), i.get_height(), horizontalalignment='center', fontweight='bold')

In [None]:
df1

##### _Exploring The Weapons Used_

In [None]:
df1.weapon.value_counts()

In [None]:
fig, ax = plt.subplots(figsize=(20,12))
ax = sns.countplot(data=df1, y="weapon", color=sns.color_palette()[9])
ax.set_title('distribution of weapons used for homicides'.upper(), fontsize=25)
ax.set_ylabel('Weapon', fontsize=15)
ax.set_xlabel('Homicide Count', fontsize=12)
for i in ax.patches:
    ax.text(i.get_width()+5, i.get_y() + i.get_height()/2, i.get_width(), horizontalalignment='center', fontweight='bold')

From the above plot we can see that, most homicides were committed using knives.

In [None]:
# df1.ladnm.value_counts()

In [None]:
# The number of homicides recorded in the various towns.
fig, ax = plt.subplots(figsize=(15,25))
sns.set_style('darkgrid')
ax = sns.countplot(data=df1, y='ladnm', color=sns.color_palette()[9])
ax.set_title("A PLOT OF NUMBER OF CRIMES IN TOWNS")
for i in ax.patches:
    ax.text(i.get_width()+1, i.get_y() + i.get_height()/2, i.get_width(), horizontalalignment='center', fontweight='bold')

In [None]:
# The distribution of weapons used for homicides in the various towns. 
fig, ax = plt.subplots(figsize=(100,25))
sns.set_style('darkgrid')
ax = sns.countplot(data=df1, x='ladnm', hue='weapon')
ax.set_title("A PLOT OF NUMBER OF CRIMES IN TOWNS")
plt.xticks(rotation=45)
# for i in ax.patches:
#     ax.text(i.get_width()+1, i.get_y() + i.get_height()/2, i.get_width(), horizontalalignment='center', fontweight='bold')

# df1.groupby(['ladnm']).weapon.value_counts()

In [None]:
# The distribution of weapons used for homicides in the various towns. 
fig, ax = plt.subplots(figsize=(100,25))
sns.set_style('darkgrid')
ax = sns.countplot(data=df1, x='ladnm', hue='vicagegp')
ax.set_title("A PLOT OF NUMBER OF CRIMES IN TOWNS")
plt.xticks(rotation=45)

# df1.groupby(['ladnm']).vicagegp.value_counts()

In [None]:
# Making a map of London
m = folium.Map(location=[51.507351, -0.127758], tiles="cartodbpositron")

# Making a marker cluster to contain all of the points (latitudes and longitudes) in the dataset.
points = MarkerCluster().add_to(m)

# Extracting points from the dataset and adding them to the cluster object.
for i,f in df1.iterrows():
    folium.Marker([f.latitude, f.longitude]).add_to(points)
    
m

In [None]:
n = folium.Map(location=[51.507351, -0.127758], tiles="cartodbdark_matter")

# Making a heatmap to show all homicide hotspots.
heat = HeatMap(df1[['latitude', 'longitude']], min_opacity=0.3).add_to(n)

n