## Imports

In [1]:
import pandas as pd
import numpy as np
import os
from dotenv import find_dotenv, load_dotenv
from src.data.make_dataset import main
from src.utils import get_root_dir

%load_ext autoreload
%autoreload 2

In [2]:
# Load environment variables
load_dotenv(find_dotenv())
url_population = os.environ.get("URL_POP_BOROUGH")
url_population

'https://data.london.gov.uk/download/land-area-and-population-density-ward-and-borough/77e9257d-ad9d-47aa-aeed-59a00741f301/housing-density-borough.csv'

In [3]:
df_pop = pd.read_csv(url_population)
df_pop.head()

Unnamed: 0,Code,Name,Year,Source,Population,Inland_Area _Hectares,Total_Area_Hectares,Population_per_hectare,Square_Kilometres,Population_per_square_kilometre
0,E09000001,City of London,1999,ONS MYE,6581,290.4,314.9,22.7,2.9,2266.2
1,E09000001,City of London,2000,ONS MYE,7014,290.4,314.9,24.2,2.9,2415.3
2,E09000001,City of London,2001,ONS MYE,7359,290.4,314.9,25.3,2.9,2534.1
3,E09000001,City of London,2002,ONS MYE,7280,290.4,314.9,25.1,2.9,2506.9
4,E09000001,City of London,2003,ONS MYE,7115,290.4,314.9,24.5,2.9,2450.1


In [4]:
df_crime = main()
df_crime.head()

/home/jamie/code/JamieW365/londoncrime/data/raw /home/jamie/code/JamieW365/londoncrime/data/processed


Unnamed: 0,Major,Minor,Borough,Date,Count
0,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-04-01,6.0
1,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-05-01,5.0
2,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-06-01,11.0
3,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-07-01,10.0
4,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-08-01,6.0


We dont have population data for London Heathrow and London City Airports
We will do an inner merge on both data sets, retaining only boroughs in which we have both crime and population statistics

In [5]:
(set(df_pop['Name']) ^ set(df_crime['Borough'])) - set(df_pop['Name'])

{'London Heathrow and London City Airports'}

In [6]:
df_merged = df_crime.merge(df_pop[['Name', 'Population']], how='inner', left_on='Borough', right_on='Name').drop('Name', axis=1)
df_merged.head()

Unnamed: 0,Major,Minor,Borough,Date,Count,Population
0,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-04-01,6.0,162444
1,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-04-01,6.0,163893
2,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-04-01,6.0,165654
3,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-04-01,6.0,166357
4,Arson and Criminal Damage,Arson,Barking and Dagenham,2010-04-01,6.0,166210


In [7]:
df_merged.sample(20)

Unnamed: 0,Major,Minor,Borough,Date,Count,Population
3071879,Violence Against the Person,Violence with Injury,Croydon,2013-02-01,213.0,441922
675497,Miscellaneous Crimes Against Society,Wildlife Crime,Barnet,2013-02-01,0.0,386198
4461257,Burglary,Domestic Burglary,Hackney,2012-01-01,126.0,289488
1427201,Miscellaneous Crimes Against Society,Exploitation of Prostitution,Brent,2015-02-01,0.0,290901
10761039,Miscellaneous Crimes Against Society,Fraud or Forgery Associated with Driver Records,Redbridge,2021-11-01,0.0,244273
2940561,Public Order Offences,Public Fear Alarm or Distress,Croydon,2021-05-01,176.0,368886
7060519,Vehicle Offences,Theft from a Motor Vehicle,Hillingdon,2022-11-01,225.0,269465
166533,Miscellaneous Crimes Against Society,Other Notifiable Offences,Barking and Dagenham,2017-06-01,1.0,248726
13552408,Possession of Weapons,Possession of Article with Blade or Point,Wandsworth,2012-07-01,8.0,308300
12305861,Sexual Offences,Rape,Sutton,2023-11-01,15.0,185860


In [8]:
df_merged['Count_Per_1000'] = df_merged['Count'] / (df_merged['Population'] / 1000)
df_merged.sample(20)

Unnamed: 0,Major,Minor,Borough,Date,Count,Population,Count_Per_1000
5537338,Miscellaneous Crimes Against Society,Soliciting for Prostitution,Haringey,2014-07-01,0.0,264284,0.0
925499,Burglary,Domestic Burglary,Bexley,2017-06-01,60.0,219123,0.273819
10518816,Robbery,Robbery of Business Property,Newham,2016-04-01,5.0,477162,0.010479
12771840,Theft,Other Theft,Tower Hamlets,2018-12-01,353.0,295909,1.192934
1138007,Possession of Weapons,Possession of Firearm with Intent,Bexley,2016-04-01,0.0,289399,0.0
1595678,Public Order Offences,"Other Offences Against the State, or Public Order",Brent,2011-10-01,8.0,270939,0.029527
2765735,Miscellaneous Crimes Against Society,Going Equipped for Stealing,Croydon,2014-07-01,1.0,357951,0.002794
12561515,Miscellaneous Crimes Against Society,Obscene Publications,Tower Hamlets,2023-07-01,12.0,380598,0.031529
12678949,Possession of Weapons,Possession of Firearms Offences,Tower Hamlets,2020-05-01,7.0,415811,0.016835
10960168,Robbery,Robbery of Personal Property,Redbridge,2012-12-01,93.0,323302,0.287657


In [10]:
df_merged.isna().sum()

Major             0
Minor             0
Borough           0
Date              0
Count             0
Population        0
Count_Per_1000    0
dtype: int64