### Importing necessary libraries

In [None]:
import pandas as pd
import geopandas
import json
import requests
from shapely.geometry import Polygon
from tqdm.notebook import tqdm
from datetime import datetime, timedelta
import plotly.express as px
import pickle

### reading geojson file to get boundaries of the boroughs and its neighbourhoods

In [None]:
# Load the variable from the file
with open('path_variable.pkl', 'rb') as f:
    path_variable = pickle.load(f)

# if this doesn't work just manually give the path the same value as you gave at retrieving_data.ipynb file
# change the PATH to the location of your folder where you store the geojson file
# PATH = '/Users/ansat.omurzakov/Desktop/TUe/Data Challenge 2/data/'
PATH = path_variable.as_posix()

In [None]:
with open(PATH + '/neighborhood_boundaries.json', 'r') as file:
    json_data = json.load(file)

# Convert JSON data to a DataFrame
boroughs_data = []
for borough, area_codes in json_data.items():
    for lst in area_codes:
        for neighbourhood, polygon in lst.items():
            boroughs_data.append({'borough': borough, 'neighbourhood': neighbourhood, 'polygon': polygon})

boroughs_neighbours = pd.DataFrame(boroughs_data)
boroughs_neighbours['polygon'] = boroughs_neighbours['polygon'].apply(lambda x: Polygon(x))
boroughs_neighbours

In [None]:
boroughs_neighbours.groupby('borough')['neighbourhood'].count()

# What do i want to know?
- Does MPS respond to the troubles issued by citizens in boroughs
- How quickly they do that

# What can i do with this data?
- Address boroughs where police does a poor job (either doesn't do anything, or took too much time to address issue)

### Going through all of the boroughs and its neighbourhoods in order to retrieve the number of unresolved cases and find the percentage of them from total cases

In [None]:
dic = {}
date_format = "%Y-%m-%dT%H:%M:%S"
count = 1
for borough in boroughs_neighbours['borough'].unique():

    print(f'{count}. Processing {borough} borough')
    total_borough_cases = 0
    unresolved_borough_cases = 0
    time_borough = timedelta()
    for neighbourhood in boroughs_neighbours[boroughs_neighbours['borough'] == borough]['neighbourhood'].unique():
        response = requests.get(f'https://data.police.uk/api/metropolitan/{neighbourhood}/priorities').json()
        total_borough_cases += len(response)
        for action in response:
            if action['action'] == None or action['action-date'] == None:
                unresolved_borough_cases += 1
            elif action['action'] != None and action['action-date'] != None:
                time_borough += datetime.strptime(action['action-date'], date_format) - datetime.strptime(action['issue-date'], date_format)
    dic[borough] = (total_borough_cases, unresolved_borough_cases, time_borough/total_borough_cases)
    count += 1

### Getting the dataset showing the Average resolution time of issues as well as percentage of unresolved cases

In [None]:
borough_data = dict(dic)

# Optionally, convert to DataFrame for better visualization and manipulation
borough_df = pd.DataFrame.from_dict(borough_data, orient='index', columns=['Total Cases', 'Unresolved Cases', 'Average Resolution Time'])

# Count the percentage of unresolved
borough_df['uc_proportion'] = round(borough_df['Unresolved Cases']/borough_df['Total Cases'] *100,2)

borough_df = borough_df.sort_values(by = 'uc_proportion', ascending=False).reset_index()
borough_df

In [None]:
fig = px.bar(borough_df.sort_values(by = 'uc_proportion', ascending=False).reset_index(), x = 'index', y = 'uc_proportion', title = 'Percentage of unresolved cases per borough')

fig.add_hline(y=40, line_width=2, line_dash="dash", line_color="black")
fig.update_layout(
    xaxis_title="Borough name", 
    yaxis_title="Percentage "
)
fig.show()

### Retrieving number of unresolved cases from each neighbourhood

In [None]:
cop = boroughs_neighbours
dic2 = {}
for borough in cop['borough'].unique():
    print(f'Doing {borough} borough')
    for neighbourhood in cop[cop['borough'] == borough]['neighbourhood'].unique():
        total_cases_neib = 0
        unres_cases_neib = 0
        response = requests.get(f'https://data.police.uk/api/metropolitan/{neighbourhood}/priorities').json()
        total_cases_neib += len(response)
        for action in response:
            if action['action'] == None or action['action-date'] == None:
                unres_cases_neib += 1
        dic2[neighbourhood] = (borough, total_cases_neib, unres_cases_neib)

### Retrieving infomation on amount of neighbourhoods and percentage of unresolved cases in top 5 best performing and worst performing boroughs
- The less the number of uc_proportion the better

In [None]:
# Optionally, convert to DataFrame for better visualization and manipulation
neib_df = pd.DataFrame.from_dict(dic2, orient='index', columns=['borough', 'total cases','Unresolved Cases'])

# Count the percentage of unresolved
neib_df['uc_proportion'] = round(neib_df['Unresolved Cases']/neib_df['total cases'] *100,2)

In [None]:
neib_df = neib_df.reset_index()

In [None]:
good = ['Merton',
 'Kingston upon Thames',
 'Hackney',
 'Wandsworth',
 'Richmond upon Thames']

bad = ['Kensington and Chelsea',
 'Hammersmith and Fulham',
 'Sutton',
 'Hounslow',
 'Camden']

In [None]:
good_df = neib_df[neib_df['borough'].isin(good)]
# good_df[['borough', 'total cases', 'Unresolved Cases', 'uc_proportion']].groupby('borough').sum().merge(good_df[['borough', 'index']].groupby('borough').count(), on = 'borough')
smth1 = good_df[['borough', 'total cases', 'Unresolved Cases']].groupby('borough').sum().merge(good_df[['borough', 'uc_proportion']].groupby('borough').mean(), on = 'borough')
good = smth1.merge(good_df[['borough', 'index']].groupby('borough').count(), on = 'borough')
good

In [None]:
bad_df = neib_df[neib_df['borough'].isin(bad)]
smth2 = bad_df[['borough', 'total cases', 'Unresolved Cases']].groupby('borough').sum().merge(bad_df[['borough', 'uc_proportion']].groupby('borough').mean(), on = 'borough')
bad = smth2.merge(bad_df[['borough', 'index']].groupby('borough').count(), on = 'borough')
bad['uc_proportion2'] = bad['uc_proportion'].apply(lambda x: round(x, 2))
bad