### Importing necessary libraries

In [1]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import re
import requests
import json
import pickle
from shapely.geometry import Point, Polygon
from tqdm import tqdm
tqdm.pandas()

ModuleNotFoundError: No module named 'plotly'

In [None]:
# Load the variable from the file
with open('path_variable.pkl', 'rb') as f:
    path_variable = pickle.load(f)

PATH = path_variable.as_posix()

In [2]:
# if this doesn't work just manually give the path the same value as you gave at retrieving_data.ipynb file
# change the PATH to the location of your folder where you store the geojson file
PATH = 'data/'

### Importing the data

In [3]:
sas = pd.read_csv(PATH + '/metropolitan-stop-and-search.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


### Some data cleaning before using it

In [None]:
sas = sas[(sas['Longitude'].isna() == False) & (sas['Latitude'].isna() == False) & (sas['Type']!= 'Vehicle search')]

###  Assign boroughs for each row

In [None]:
with open(PATH + '/neighborhood_boundaries.json', 'r') as file:
    json_data = json.load(file)

# Convert JSON data to a DataFrame
boroughs_data = []
for borough, area_codes in json_data.items():
    for lst in area_codes:
        for neighbourhood, polygon in lst.items():
            boroughs_data.append({'borough': borough, 'neighbourhood': neighbourhood, 'polygon': polygon})

boroughs_neighbours = pd.DataFrame(boroughs_data)
boroughs_neighbours['polygon'] = boroughs_neighbours['polygon'].apply(lambda x: Polygon(x))
boroughs_neighbours

In [None]:
def get_borough_name(data):
    lat, long = data
    point = Point(lat, long)
    is_within = boroughs_neighbours['polygon'].apply(lambda x: point.within(x))
    if len(boroughs_neighbours[is_within]['borough'].values) > 0:
        borough_name = boroughs_neighbours[is_within]['borough'].values[0]
        return borough_name
    else:
        return 'Unknown'

In [None]:
locs = sas[['Latitude', 'Longitude']].drop_duplicates(keep = 'first')
locs['borough'] = locs.progress_apply(get_borough_name, axis = 1)

In [None]:
locs

In [None]:
sas = sas.merge(locs, on = ['Latitude', 'Longitude'], how = 'right')

### Removing irrelevant columns from the stop and search dataset

In [None]:
sas_cleaned = sas.drop(['Part of a policing operation', 'Policing operation', 'Outcome linked to object of search', 'Removal of more than just outer clothing'], axis = 1)

### Creating column with generalized ehtnicities so we would have 1 ehtnicity column with as much info as possible. 

In [None]:
# function to interpolate values when officer defined ethnicity is Other and Self-defined ethnicity is not, such that we would have more data.
def group_ethnicity(data):
    if (data['Officer-defined ethnicity'] == 'Other' or not isinstance(data['Officer-defined ethnicity'], str)) and isinstance(data['Self-defined ethnicity'], str):
        if re.match(r"^(White)", data['Self-defined ethnicity']): # white
            return 'White'
        elif re.match(r"^(Black)", data['Self-defined ethnicity']): # black
            return 'Black'
        elif re.match(r"^(Asian)|(Chinese)", data['Self-defined ethnicity']): # asian
            return 'Asian'
        elif re.match(r"^(Mixed)|",data['Self-defined ethnicity']): # mixed
            return 'Mixed'
        elif re.match(r"^(Other)|(Not)",data['Self-defined ethnicity']): # other/unknown/not_stated
            return 'Other'
    else:
        return data['Officer-defined ethnicity']
        

In [None]:
sas_cleaned['generalized_ethnicity'] = sas_cleaned.apply(group_ethnicity, axis = 1)

In [None]:
sas_cleaned = sas_cleaned[sas_cleaned['generalized_ethnicity'].isna() == False]
sas_cleaned = sas_cleaned[sas_cleaned['borough'] != 'Unknown']

In [None]:
sas_cleaned

### a new dataframe which will be merged with other dataframes, with no irrelevant columns

In [None]:
sas_for_merge = sas_cleaned.drop(['Type', 'Latitude', 'Longitude', 'Self-defined ethnicity', 'Officer-defined ethnicity'], axis = 1)

### adding new columns for month and year

In [None]:
sas_for_merge['year'] = pd.to_datetime(sas_for_merge['Date']).dt.year
sas_for_merge['month'] = pd.to_datetime(sas_for_merge['Date']).dt.month

In [None]:
sas_for_merge = sas_for_merge[sas_for_merge['year'].between(2016,2023)]

In [None]:
sas_for_merge = sas_for_merge.drop('Date', axis = 1)

In [None]:
def give_quarter(month):
    if month in [1,2,3]:
        return 1
    elif month in [4,5,6]:
        return 2
    elif month in [7,8,9]:
        return 3
    elif month in [10,11,12]:
        return 4

In [None]:
sas_for_merge['quarter'] = sas_for_merge['month'].apply(give_quarter)

In [None]:
columns_to_count = ['Gender', 'Age range', 'generalized_ethnicity', 'Legislation', 'Object of search', 'Outcome']
dummies = pd.get_dummies(sas_for_merge[columns_to_count])
sas_cleaned_with_dummies = pd.concat([sas_for_merge, dummies], axis=1)

In [None]:
wrap = sas_cleaned_with_dummies.drop(['Gender', 'Age range', 'generalized_ethnicity', 'Legislation', 'Object of search', 'Outcome', 'month'], axis = 1)

In [None]:
wrap = wrap.groupby(['borough', 'year', 'quarter']).sum().reset_index()
wrap

### Imoprting new dataframe with PAS data from 2016-2023 (not questions)

In [None]:
pas = pd.read_csv(PATH + '/PAS_borough')

In [None]:
# Create a pivot table
pivot_table = pas.pivot_table(index=['Date', 'Borough'], columns='Measure', values='Proportion', fill_value=None)

# Reset index to make it columns again
pivot_table.reset_index(inplace=True)

In [None]:
pivot_table['month'] = pd.to_datetime(pivot_table['Date']).dt.month
pivot_table['day'] = pd.to_datetime(pivot_table['Date']).dt.day
pivot_table['year'] = pd.to_datetime(pivot_table['Date']).dt.year

In [None]:
pivot_table = pivot_table[pivot_table['year'].between(2016, 2024)].reset_index()
pivot_table = pivot_table.drop(['index', 'Date'], axis = 1)

In [None]:
pivot_table['Borough'] = pivot_table['Borough'].apply(lambda x: 'Westminster'  if x == 'City of Westminster' else x)

In [None]:
pivot_table['quarter'] = pivot_table['month'].apply(give_quarter)

In [None]:
pivot_for_merge= pivot_table[['Borough', '"Good Job" local', 'Trust MPS', 'Contact ward officer', 'Informed local', 'Listen to concerns', 'Relied on to be there', 'Treat everyone fairly', 'Understand issues', 'quarter', 'year']]
pivot_for_merge = pivot_for_merge.rename(columns = {'Borough': 'borough'})

### Merging datasets of PAS and Stop and Search

In [None]:
all_data = pivot_for_merge.merge(wrap, on = ['borough', 'year', 'quarter'], how = 'inner')

In [None]:
all_data

In [None]:
all_data.to_csv(PATH + '/PAS_with_SAS.csv')

### Visualization of change of scores for Trust and Confidence in MOPAC throughout 2016 Q2 -2023 Q4 period

In [None]:
all_data['year-quarter'] = all_data['year-quarter'] = all_data['year'].astype(str) + '-Q' + all_data['quarter'].astype(str)

In [None]:

fig, axes = plt.subplots(nrows=4, ncols=8, figsize=(25, 15))
fig.subplots_adjust(hspace=0.5)

x_min = all_data['year-quarter'].min()
x_max = all_data['year-quarter'].max()
y_min = all_data['Trust MPS'].min()
y_max = all_data['Trust MPS'].max()

all_xticks = sorted(all_data['year-quarter'].unique())
step = max(1, len(all_xticks) // 10)  # Adjust step to show fewer ticks
xticks = all_xticks[::step]

# Iterate through each borough and create a line chart
boroughs = all_data['borough'].unique()
for i, borough in enumerate(boroughs):
    row = i // 8
    col = i % 8
    
    # Filter data for the current borough
    data = all_data[all_data['borough'] == borough]
    
    # Plot the line chart
    ax = axes[row, col]
    ax.plot(data['year-quarter'], data['Trust MPS'], marker='o', linestyle='-')
    ax.set_title(borough)
    ax.set_xlim(x_min, x_max)  # Set x-axis range
    ax.set_ylim(y_min, y_max)  # Set y-axis range
    ax.set_xlabel('Year-Quarter')
    ax.set_ylabel('Trust')
    ax.tick_params(axis='x', rotation=45)  # Rotate x-axis labels for better readability
    
    # Set custom ticks
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticks, rotation=45, ha='right')
    

# Adjust layout
fig.suptitle('Trust in all boroughs observed by MOPAC', fontsize = 30, fontweight = 'bold')
plt.tight_layout()
plt.show()

In [None]:

fig, axes = plt.subplots(nrows=4, ncols=8, figsize=(25, 15))
fig.subplots_adjust(hspace=0.5)

x_min = all_data['year-quarter'].min()
x_max = all_data['year-quarter'].max()
y_min = all_data['"Good Job" local'].min()
y_max = all_data['"Good Job" local'].max()

all_xticks = sorted(all_data['year-quarter'].unique())
step = max(1, len(all_xticks) // 10)  # Adjust step to show fewer ticks
xticks = all_xticks[::step]

# Iterate through each borough and create a line chart
boroughs = all_data['borough'].unique()
for i, borough in enumerate(boroughs):
    row = i // 8
    col = i % 8
    
    # Filter data for the current borough
    data = all_data[all_data['borough'] == borough]
    
    # Plot the line chart
    ax = axes[row, col]
    ax.plot(data['year-quarter'], data['"Good Job" local'], marker='o', linestyle='-')
    ax.set_title(borough)
    ax.set_xlim(x_min, x_max)  # Set x-axis range
    ax.set_ylim(y_min, y_max)  # Set y-axis range
    ax.set_xlabel('Year-Quarter')
    ax.set_ylabel('Confidence')
    ax.tick_params(axis='x', rotation=45)  # Rotate x-axis labels for better readability
    
    # Set custom ticks
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticks, rotation=45, ha='right')
    

# Adjust layout
fig.suptitle('Confidence in all boroughs observed by MOPAC', fontsize = 30, fontweight = 'bold')
plt.tight_layout()
plt.show()