# Predictive-Policing-in-the-London-Metropolitan-Police-District

## 1. Environment Setup

### 1. 1. Update Linux Packages

In [None]:
!sudo apt update

### 1. 2. Install Python and Packages (pip & venv)

In [None]:
!sudo apt install -y python3 python3-pip python3-venv

### 1. 3. Create a Virtual Environment

In [None]:
!python3 -m venv .venv

### 1. 4. Activate the Virtual Environment

In [None]:
!source .venv/bin/activate

### 1.5. Install the Required Python Packages

In [None]:
%pip install -r requirements.txt

### 1. 6. Create Folders to Store Datasets

In [None]:
import os

# Raw Data Files
os.makedirs("Datasets", exist_ok=True)
os.makedirs("Datasets/Raw-Data", exist_ok=True)
os.makedirs("Datasets/Raw-Data/Police-Data", exist_ok=True)
os.makedirs("Datasets/Raw-Data/Map-Data", exist_ok=True)
os.makedirs("Datasets/Raw-Data/Income-Data", exist_ok=True)
os.makedirs("Datasets/Raw-Data/Population-Data", exist_ok=True)
os.makedirs("Datasets/Raw-Data/Unemployment-Data", exist_ok=True)
os.makedirs("Datasets/Raw-Data/Weather-Data", exist_ok=True)

# Data For Processing

os.makedirs("Datasets/Data-For-Processing", exist_ok=True)
os.makedirs("Datasets/Data-For-Processing/Police-Data", exist_ok=True)
os.makedirs("Datasets/Data-For-Processing/Map-Data", exist_ok=True)
os.makedirs("Datasets/Data-For-Processing/Income-Data", exist_ok=True)
os.makedirs("Datasets/Data-For-Processing/Population-Data", exist_ok=True)
os.makedirs("Datasets/Data-For-Processing/Unemployment-Data", exist_ok=True)
os.makedirs("Datasets/Data-For-Processing/Weather-Data", exist_ok=True)

# Processed Data Files
os.makedirs("Datasets/Processed-Data", exist_ok=True)
os.makedirs("Datasets/Processed-Data/Police-Data", exist_ok=True)
os.makedirs("Datasets/Processed-Data/Map-Data", exist_ok=True)
os.makedirs("Datasets/Processed-Data/Income-Data", exist_ok=True)
os.makedirs("Datasets/Processed-Data/Population-Data", exist_ok=True)
os.makedirs("Datasets/Processed-Data/Unemployment-Data", exist_ok=True)
os.makedirs("Datasets/Processed-Data/Weather-Data", exist_ok=True)

## 2. Data Ingestion

### 2. 2. Police Data Ingestion

In [None]:
# Download Police Data
!wget -O Datasets/Raw-Data/Police-Data/police_data_2020_2022.zip https://data.police.uk/data/archive/2022-12.zip
print("Downloaded police data from 2020 to 2022.")

!wget -O Datasets/Raw-Data/Police-Data/police_data_2022_2024.zip https://data.police.uk/data/archive/2024-12.zip
print("Downloaded police data from 2022 to 2024.")

In [None]:
# Extract Police Data
!unzip -o Datasets/Raw-Data/Police-Data/police_data_2020_2022.zip -d Datasets/Raw-Data/Police-Data/
!unzip -o Datasets/Raw-Data/Police-Data/police_data_2022_2024.zip -d Datasets/Raw-Data/Police-Data/
print("Extracted police data zip files.")

### 2. 3. Map Data Ingestion

In [None]:
# Download Map Data 

# Please Download LSOA and MSOA map data manually from the following links due to there being no direct download links available and place them in Raw-Data/Map-Data/
# LSOA Map Data - https://geoportal.statistics.gov.uk/datasets/6beafcfd9b9c4c9993a06b6b199d7e6d_0/explore?location=43.468898%2C-2.489483%2C3.79 (Both csv and geojson formats available)
# MSOA Map Data - https://geoportal.statistics.gov.uk/datasets/12baf1e6a44441208ffe5ba5ed063a68_0/explore?location=52.284503%2C-1.473701%2C11.49 (Both csv and geojson formats available)

# LSAO to MSOA Lookup Data
!wget -O Datasets/Raw-Data/Map-Data/LSOA_to_MSOA_Lookup_2021.csv https://www.arcgis.com/sharing/rest/content/items/c4f84c38814d4b82aa4760ade686c3cc/data
print("Downloaded LSOA to MSOA Lookup data.")

### 2. 4. Income Data Ingestion

In [None]:
# Download Income Data

# calculate other years - https://chatgpt.com/share/695b5cf5-8750-8007-bcbd-d531f4e8bdd9

# 2023 income data
!wget -O Datasets/Raw-Data/Income-Data/Income_Data_MSOA_2023.xlsx https://www.ons.gov.uk/file?uri=/employmentandlabourmarket/peopleinwork/earningsandworkinghours/datasets/smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales/financialyearending2023/datasetfinal.xlsx
print("Downloaded 2023 income data.")

#2020 income data
!wget -O Datasets/Raw-Data/Income-Data/Income_Data_MSOA_2020.xlsx https://www.ons.gov.uk/file?uri=/employmentandlabourmarket/peopleinwork/earningsandworkinghours/datasets/smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales/financialyearending2020/saiefy1920finalqaddownload280923.xlsx    
print("Downloaded 2020 income data.")

### 2. 5. Population Data Ingestion

In [None]:
# Download Population Data

# https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/lowersuperoutputareamidyearpopulationestimates

# Mid 2020 - 2021
!wget -O Datasets/Raw-Data/Population-Data/LSOA_Midyear_Population_Estimates_2020.xlsx https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/lowersuperoutputareamidyearpopulationestimates/mid2020sape23dt2/sape23dt2mid2020lsoasyoaestimatesunformatted.xlsx

# Mid 2021 - 2022
!wget -O Datasets/Raw-Data/Population-Data/LSOA_Midyear_Population_Estimates_2021_2022.xlsx https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/lowersuperoutputareamidyearpopulationestimates/mid2021andmid2022/sapelsoasyoatablefinal.xlsx

# Mid 2022 - 2024
!wget -O Datasets/Raw-Data/Population-Data/LSOA_Midyear_Population_Estimates_2022_2024.xlsx https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/lowersuperoutputareamidyearpopulationestimates/mid2022revisednov2025tomid2024/sapelsoasyoa20222024.xlsx

### 2. 6. Unemployment Data Ingestion

In [None]:
# Data for 2021 - https://www.nomisweb.co.uk/output/census/2021/census2021-ts065.zip
# Use this to calculate all the other years generally - https://www.ons.gov.uk/generator?format=csv&uri=/employmentandlabourmarket/peoplenotinwork/unemployment/timeseries/mgsx/lms 

!wget -O Datasets/Raw-Data/Unemployment-Data/Unemployment_Data_2021.zip https://www.nomisweb.co.uk/output/census/2021/census2021-ts065.zip
print("Downloaded unemployment data for 2021.")

!wget -q -O Datasets/Raw-Data/Unemployment-Data/Unemployment_Data_General_All_Years.csv "https://www.ons.gov.uk/generator?format=csv&uri=/employmentandlabourmarket/peoplenotinwork/unemployment/timeseries/mgsx/lms/previous/v118"
print("Downloaded general unemployment data for all years.")

In [None]:
!unzip -o Datasets/Raw-Data/Unemployment-Data/Unemployment_Data_2021.zip -d Datasets/Raw-Data/Unemployment-Data/
print("Extracted unemployment data zip file.")

### 2. 7. Whether Data Ingestion

In [None]:
# Monthly Status only- https://www.metoffice.gov.uk/pub/data/weather/uk/climate/stationdata/heathrowdata.txt

!wget -O Datasets/Raw-Data/Weather-Data/Heathrow_Monthly_Weather_Data.txt https://www.metoffice.gov.uk/pub/data/weather/uk/climate/stationdata/heathrowdata.txt
print("Downloaded Heathrow monthly weather data.")

### 2. 8. Turning XLSX Files to CSV Files

In [None]:
import pandas as pd
from pathlib import Path

# List of Excel files
excel_files = [
    "Datasets/Raw-Data/Population-Data/LSOA_Midyear_Population_Estimates_2020.xlsx",
    "Datasets/Raw-Data/Population-Data/LSOA_Midyear_Population_Estimates_2021_2022.xlsx",
    "Datasets/Raw-Data/Population-Data/LSOA_Midyear_Population_Estimates_2022_2024.xlsx",
    "Datasets/Raw-Data/Income-Data/Income_Data_MSOA_2020.xlsx",
    "Datasets/Raw-Data/Income-Data/Income_Data_MSOA_2023.xlsx"
]

for file in excel_files:
    file_path = Path(file)
    output_dir = file_path.parent / "csv"
    output_dir.mkdir(exist_ok=True)

    # Load Excel file
    xls = pd.ExcelFile(file_path)

    for sheet in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet)

        # Clean sheet name for filename
        safe_sheet_name = sheet.replace(" ", "_").replace("/", "_")

        output_file = output_dir / f"{file_path.stem}_{safe_sheet_name}.csv"
        df.to_csv(output_file, index=False)

        print(f"Saved: {output_file}")


## 3. Data Cleaning

## 4. Data Transformation