In [9]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Load the cleaned CSV
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Optional: preview the first few rows
df.head()

Unnamed: 0,crime_id,month,longitude,latitude,location,lsoa_code,lsoa_name,crime_type,last_outcome_category
0,7c903b55af24fe6dab7eb96fe696f8e7d57e05817f6730...,2024-06-01,-2.746819,53.389101,On or near Further/Higher Educational Building,E01012393,Halton 001B,Drugs,Local resolution
1,461fe1ff825ef4cc740dafe58e9015449696d80978580a...,2024-06-01,-2.798814,53.354705,On or near Old Higher Road,E01012391,Halton 008B,Criminal damage and arson,Investigation complete; no suspect identified
2,ASB_FILL,2024-06-01,-2.871827,53.489763,On or near Gilescroft Avenue,E01006448,Knowsley 001A,Anti-social behaviour,ASB_FILL
3,ebeec31356de0e9219711d81f3d8006d33c275e04e9e7a...,2024-06-01,-2.871229,53.4893,On or near Watts Close,E01006448,Knowsley 001A,Criminal damage and arson,Investigation complete; no suspect identified
4,b04da555d9c4f211ce48bf6453dc0f6257c63785727ec6...,2024-06-01,-2.871827,53.489763,On or near Gilescroft Avenue,E01006448,Knowsley 001A,Criminal damage and arson,Unable to prosecute suspect


In [10]:
# Convert to GeoDataFrame
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Get unique crime types and outcomes
crime_types = gdf['crime_type'].dropna().unique()
crime_outcomes = gdf['last_outcome_category'].dropna().unique()

# Print vertical lists
print("Crime Types:")
for crime in sorted(crime_types):
    print(f"- {crime}")

print("\nCrime Outcomes:")
for outcome in sorted(crime_outcomes):
    print(f"- {outcome}")

Crime Types:
- Anti-social behaviour
- Bicycle theft
- Burglary
- Criminal damage and arson
- Drugs
- Other crime
- Other theft
- Possession of weapons
- Public order
- Robbery
- Shoplifting
- Theft from the person
- Vehicle crime
- Violence and sexual offences

Crime Outcomes:
- ASB_FILL
- Action to be taken by another organisation
- Awaiting court outcome
- Court result unavailable
- Formal action is not in the public interest
- Further action is not in the public interest
- Further investigation is not in the public interest
- Investigation complete; no suspect identified
- Local resolution
- Offender given a caution
- Status update unavailable
- Suspect charged as part of another case
- Unable to prosecute suspect
- Under investigation


In [12]:
import pandas as pd

# Load cleaned data
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Filter out Anti-social behaviour
df_filtered = df[df['crime_type'] != 'Anti-social behaviour'].copy()

# Define outcome mappings
positive_outcomes = [
    'Action to be taken by another organisation',
    'Offender given a drugs possession warning',
    'Suspect charged as part of another case',
    'Offender given a caution',
    'Local resolution'
]

neutral_outcomes = [
    'Status update unavailable',
    'Under investigation',
    'Awaiting court outcome',
    'Court result unavailable'
]

negative_outcomes = [
    'Unable to prosecute suspect',
    'Investigation complete; no suspect identified',
    'Further action is not in the public interest',
    'Further investigation is not in the public interest',
    'Formal action is not in the public interest'
]

# Function to map outcomes
def map_outcome(outcome):
    if outcome in positive_outcomes:
        return 'Positive'
    elif outcome in neutral_outcomes:
        return 'Neutral'
    elif outcome in negative_outcomes:
        return 'Negative'
    else:
        return 'Other'

# Apply mapping
df_filtered['outcome_type'] = df_filtered['last_outcome_category'].apply(map_outcome)

# Create summary table: crime_type vs outcome_type counts
summary_table = (
    df_filtered
    .groupby(['crime_type', 'outcome_type'])
    .size()
    .unstack(fill_value=0)
    .reset_index()
)

# Save to CSV
summary_table.to_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/crime_vs_outcome_type.csv', index=False)

# Show table
print(summary_table)


outcome_type                    crime_type  Negative  Neutral  Positive
0                            Bicycle theft       949       64         3
1                                 Burglary      3382      887        38
2                Criminal damage and arson     11039     1236       113
3                                    Drugs      1552     5375      5829
4                              Other crime      2485     1297       124
5                              Other theft      7699      578        70
6                    Possession of weapons       422      910        17
7                             Public order     12652     2570       142
8                                  Robbery       855      356         1
9                              Shoplifting      6935     2229       150
10                   Theft from the person       793       65         1
11                           Vehicle crime      4699      410        20
12            Violence and sexual offences     46103    10657   

In [13]:
import pandas as pd

# Load your cleaned data
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Extract broader place name (e.g., "Halton" from "Halton 001B")
df['place_name'] = df['lsoa_name'].apply(lambda x: x.split(' ')[0])

# Group by place_name and get crime counts
place_crime_counts = df.groupby('place_name').size().reset_index(name='crime_count')

# Sort descending by crime count
place_crime_counts = place_crime_counts.sort_values(by='crime_count', ascending=False)

# Show the result
print(place_crime_counts)


   place_name  crime_count
3   Liverpool        68160
9      Wirral        25478
4      Sefton        24107
5         St.        17771
2    Knowsley        14917
7        West           36
1      Halton           22
0    Cheshire           17
8       Wigan           16
6  Warrington           11


In [14]:
unique_places = df['place_name'].nunique()
print(f"Number of unique places: {unique_places}")

# And to list them all:
print(df['place_name'].unique())


Number of unique places: 10
['Halton' 'Knowsley' 'Liverpool' 'Sefton' 'St.' 'West' 'Wigan' 'Wirral'
 'Cheshire' 'Warrington']


In [16]:
import pandas as pd

# Load cleaned data
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Define outcome categories
positive_outcomes = [
    "Action to be taken by another organisation",
    "Offender given a drugs possession warning",
    "Suspect charged as part of another case",
    "Offender given a caution",
    "Local resolution"
]

neutral_outcomes = [
    "Status update unavailable",
    "Under investigation",
    "Awaiting court outcome",
    "Court result unavailable"
]

negative_outcomes = [
    "Unable to prosecute suspect",
    "Investigation complete; no suspect identified",
    "Further action is not in the public interest",
    "Further investigation is not in the public interest",
    "Formal action is not in the public interest"
]

# Exclude ASB crimes
df_filtered = df[df['crime_type'] != 'Anti-social behaviour']

# Extract main area name from lsoa_name (before last 5 characters e.g. ' 001B')
df_filtered['area'] = df_filtered['lsoa_name'].str[:-5].str.strip()

# Map last_outcome_category into outcome_type
def map_outcome(outcome):
    if outcome in positive_outcomes:
        return 'Positive'
    elif outcome in neutral_outcomes:
        return 'Neutral'
    elif outcome in negative_outcomes:
        return 'Negative'
    else:
        return 'Unknown'

df_filtered['outcome_type'] = df_filtered['last_outcome_category'].apply(map_outcome)

# Group by area and outcome_type, count occurrences
summary = (
    df_filtered
    .groupby(['area', 'outcome_type'])
    .size()
    .unstack(fill_value=0)
    .reset_index()
)

# Save to CSV
summary.to_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/outcome_type_by_region.csv', index=False)

# Display summary table
print(summary)


outcome_type                       area  Negative  Neutral  Positive
0             Cheshire West and Chester        10        2         1
1                                Halton         7        4         3
2                              Knowsley      9569     2518       984
3                             Liverpool     44833    12465      4019
4                                Sefton     16469     3951      1201
5                            St. Helens     11779     3231       882
6                            Warrington         7        0         2
7                       West Lancashire        16        1         2
8                                 Wigan         5        5         1
9                                Wirral     16870     4457       989


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['area'] = df_filtered['lsoa_name'].str[:-5].str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['outcome_type'] = df_filtered['last_outcome_category'].apply(map_outcome)


In [21]:
import pandas as pd

# Load cleaned data
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Filter out Anti-social behaviour and any missing outcome
df_filtered = df[(df['crime_type'] != 'Anti-social behaviour') & (df['last_outcome_category'].notnull())]

# Find top 20 LSOAs by total crime count
top20_lsoas = df_filtered['lsoa_name'].value_counts().nlargest(20).index.tolist()

# Filter dataframe to those LSOAs only
df_top20 = df_filtered[df_filtered['lsoa_name'].isin(top20_lsoas)]

# Create pivot table: rows=LSOA, columns=last_outcome_category, values=counts
pivot_table = pd.pivot_table(
    df_top20,
    index='lsoa_name',
    columns='last_outcome_category',
    aggfunc='size',
    fill_value=0
).reset_index()

# Save CSV
pivot_table.to_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/outcome_in_t20_lsoas.csv', index=False)

# Print the pivot table
print(pivot_table)

last_outcome_category        lsoa_name  \
0                        Knowsley 003D   
1                       Liverpool 006C   
2                       Liverpool 014E   
3                       Liverpool 022H   
4                       Liverpool 023D   
5                       Liverpool 031B   
6                       Liverpool 060C   
7                       Liverpool 060D   
8                       Liverpool 060E   
9                       Liverpool 061C   
10                      Liverpool 061F   
11                         Sefton 004H   
12                         Sefton 004I   
13                         Sefton 037D   
14                     St. Helens 012A   
15                     St. Helens 012C   
16                     St. Helens 014C   
17                     St. Helens 024E   
18                         Wirral 016C   
19                         Wirral 016G   

last_outcome_category  Action to be taken by another organisation  \
0                                               

In [22]:
import pandas as pd

# Load cleaned data
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Group by location and count crimes
location_counts = df['location'].value_counts().head(15).reset_index()

# Rename columns for clarity
location_counts.columns = ['location', 'crime_count']

# Print the result
print(location_counts)


                                          location  crime_count
0                          On or near Parking Area         5590
1                           On or near Supermarket         5237
2                         On or near Shopping Area         2819
3                        On or near Petrol Station         2261
4                             On or near Nightclub         1821
5                On or near Sports/Recreation Area          925
6   On or near Further/Higher Educational Building          699
7                              On or near Hospital          696
8                        On or near Police Station          614
9                      On or near Rainford Gardens          508
10                        On or near Oakhouse Park          468
11                          On or near Whitechapel          451
12                       On or near Cazneau Street          451
13                                On or near A5040          398
14                     On or near Back L

In [24]:
import pandas as pd

# Load cleaned data
df = pd.read_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/clean_data_final.csv')

# Exclude ASB crimes
df = df[df['crime_type'] != 'Anti-social behaviour']

# Define top 9 generic locations
top_generic_locations = [
    'On or near Parking Area',
    'On or near Supermarket',
    'On or near Shopping Area',
    'On or near Petrol Station',
    'On or near Nightclub',
    'On or near Sports/Recreation Area',
    'On or near Further/Higher Educational Building',
    'On or near Hospital',
    'On or near Police Station'
]

# Filter for those locations
df_top_locations = df[df['location'].isin(top_generic_locations)]

# Define outcome mapping
positive = [
    'Action to be taken by another organisation',
    'Offender given a drugs possession warning',
    'Suspect charged as part of another case',
    'Offender given a caution',
    'Local resolution'
]

neutral = [
    'Status update unavailable',
    'Under investigation',
    'Awaiting court outcome',
    'Court result unavailable'
]

negative = [
    'Unable to prosecute suspect',
    'Investigation complete; no suspect identified',
    'Further action is not in the public interest',
    'Further investigation is not in the public interest',
    'Formal action is not in the public interest'
]

# Function to map outcomes
def map_outcome(x):
    if x in positive:
        return 'positive'
    elif x in neutral:
        return 'neutral'
    elif x in negative:
        return 'negative'
    else:
        return 'other'

# Create new mapped outcome column
df_top_locations['outcome_type'] = df_top_locations['last_outcome_category'].apply(map_outcome)

# Group by location and outcome_type
location_outcome_counts = df_top_locations.groupby(['location', 'outcome_type']).size().unstack(fill_value=0)

# Reset index for formatting
location_outcome_counts = location_outcome_counts.reset_index()

# Print the table
print(location_outcome_counts)

# Save as CSV
location_outcome_counts.to_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/top9_locations_vs_outcome_type.csv', index=False)

outcome_type                                        location  negative  \
0             On or near Further/Higher Educational Building       449   
1                                        On or near Hospital       457   
2                                       On or near Nightclub      1195   
3                                    On or near Parking Area      3365   
4                                  On or near Petrol Station      1579   
5                                  On or near Police Station       196   
6                                   On or near Shopping Area      1742   
7                          On or near Sports/Recreation Area       560   
8                                     On or near Supermarket      3612   

outcome_type  neutral  positive  
0                 124        66  
1                 156        11  
2                 341       152  
3                1029       367  
4                 310        86  
5                 348        47  
6                 584

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_top_locations['outcome_type'] = df_top_locations['last_outcome_category'].apply(map_outcome)


In [26]:
import pandas as pd

# Example outcome mapping - adjust if you haven't done this yet:
positive_outcomes = [
    "Action to be taken by another organisation",
    "Offender given a drugs possession warning",
    "Suspect charged as part of another case",
    "Offender given a caution",
    "Local resolution"
]

neutral_outcomes = [
    "Status update unavailable",
    "Under investigation",
    "Awaiting court outcome",
    "Court result unavailable"
]

negative_outcomes = [
    "Unable to prosecute suspect",
    "Investigation complete; no suspect identified",
    "Further action is not in the public interest",
    "Further investigation is not in the public interest",
    "Formal action is not in the public interest"
]

# Map the 'last_outcome_category' to custom outcome types
def map_outcome(outcome):
    if outcome in positive_outcomes:
        return 'Positive'
    elif outcome in neutral_outcomes:
        return 'Neutral'
    elif outcome in negative_outcomes:
        return 'Negative'
    else:
        return 'Other'

# Apply mapping
gdf['outcome_category'] = gdf['last_outcome_category'].apply(map_outcome)

# Filter out ASB crimes if not already done
gdf_filtered = gdf[gdf['crime_type'] != 'Anti-social behaviour'].copy()

# Convert 'month' from string to datetime
gdf_filtered['month'] = pd.to_datetime(gdf_filtered['month'])

# Create a formatted month string, e.g. "June 2024"
gdf_filtered['month_str'] = gdf_filtered['month'].dt.strftime('%B %Y')

# Group by month_str and outcome_category, count crimes
month_outcome_counts = (
    gdf_filtered
    .groupby(['month_str', 'outcome_category'])
    .size()
    .reset_index(name='crime_count')
)

# Pivot table so outcome categories become columns
month_outcome_pivot = month_outcome_counts.pivot(index='month_str', columns='outcome_category', values='crime_count').fillna(0)

# Sort by datetime order (reconvert month_str to datetime to sort properly)
month_outcome_pivot.index = pd.to_datetime(month_outcome_pivot.index)
month_outcome_pivot = month_outcome_pivot.sort_index()

# Optional: convert index back to formatted string
month_outcome_pivot.index = month_outcome_pivot.index.strftime('%B %Y')

# Show the table
print(month_outcome_pivot)

# Save to CSV if you want:
month_outcome_pivot.to_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/month_vs_outcome_type.csv')


outcome_category  Negative  Neutral  Positive
month_str                                    
June 2024             9171     1654       729
July 2024             9692     1646       730
August 2024           9025     1603       635
September 2024        8916     1588       599
October 2024          9109     1608       761
November 2024         8607     1731       719
December 2024         8092     1770       653
January 2025          7388     2013       594
February 2025         7363     2068       666
March 2025            8306     2771       772
April 2025            7271     3160       697
May 2025              6625     5022       529


  month_outcome_pivot.index = pd.to_datetime(month_outcome_pivot.index)


In [32]:
# Define outcome groups
positive_outcomes = [
    "Action to be taken by another organisation",
    "Offender given a drugs possession warning",
    "Suspect charged as part of another case",
    "Offender given a caution",
    "Local resolution"
]

neutral_outcomes = [
    "Status update unavailable",
    "Under investigation",
    "Awaiting court outcome",
    "Court result unavailable"
]

negative_outcomes = [
    "Unable to prosecute suspect",
    "Investigation complete; no suspect identified",
    "Further action is not in the public interest",
    "Further investigation is not in the public interest",
    "Formal action is not in the public interest"
]

# Mapping function
def map_outcome_group(outcome):
    if outcome in positive_outcomes:
        return "Positive"
    elif outcome in neutral_outcomes:
        return "Neutral"
    elif outcome in negative_outcomes:
        return "Negative"
    else:
        return "Unknown"

# Filter out Anti-social behaviour
df_filtered = gdf[gdf['crime_type'] != 'Anti-social behaviour'].copy()

# Apply mapping to create new column
df_filtered['outcome_group'] = df_filtered['last_outcome_category'].apply(map_outcome_group)

# Assuming you've already mapped crime_type to crime_group as 'crime_group' column

# Create a pivot table of counts
summary_table = df_filtered.pivot_table(
    index='crime_group',
    columns='outcome_group',
    values='crime_id',  # or any non-null column
    aggfunc='count',
    fill_value=0
)
summary_table.to_csv('/Users/jamesjackson/Documents/liverpool_crime_analysis/crime_group_vs_outcome_group.csv')

print(summary_table)

outcome_group               Negative  Neutral  Positive
crime_group                                            
Drug-Related Crime              1552     5375      5829
Other Crime                     2485     1297       124
Possession of Weapons            422      910        17
Theft / Burglary / Robbery     20613     4179       263
Vehicle Crime                   4699      410        20
Violent / Sexual Offences      57142    11893      1689


In [33]:
# Extract 'area' from lsoa_name (e.g. 'Halton' from 'Halton 001B')
gdf['area'] = gdf['lsoa_name'].str.split().str[0]

# Group by area and crime_group, count occurrences
area_crime_group = (
    gdf.groupby(['area', 'crime_group'])
       .size()
       .unstack(fill_value=0)
       .reset_index()
)

print(area_crime_group)

# Save to CSV
area_crime_group.to_csv(
    '/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/excel-csvs/area_vs_crime_group.csv',
    index=False
)


crime_group        area  Anti-Social Behaviour  Drug-Related Crime  \
0              Cheshire                      4                   2   
1                Halton                      8                   3   
2              Knowsley                   1846                1332   
3             Liverpool                   6843                6854   
4                Sefton                   2486                1676   
5                   St.                   1879                1238   
6            Warrington                      2                   2   
7                  West                     17                   1   
8                 Wigan                      5                   2   
9                Wirral                   3162                1646   

crime_group  Other Crime  Possession of Weapons  Theft / Burglary / Robbery  \
0                      0                      0                           1   
1                      1                      0                        

In [34]:
# Filter to top 9 most common generic locations
top_locations = gdf['location'].value_counts().nlargest(9).index

# Filter the dataset to only those locations
location_filtered = gdf[gdf['location'].isin(top_locations)]

# Group by location and crime group
location_crime_group = (
    location_filtered.groupby(['location', 'crime_group'])
                     .size()
                     .unstack(fill_value=0)
                     .reset_index()
)

print(location_crime_group)

# Save to CSV
location_crime_group.to_csv(
    '/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/excel-csvs/top_locations_vs_crime_group.csv',
    index=False
)


crime_group                                        location  \
0            On or near Further/Higher Educational Building   
1                                       On or near Hospital   
2                                      On or near Nightclub   
3                                   On or near Parking Area   
4                                 On or near Petrol Station   
5                                 On or near Police Station   
6                                  On or near Shopping Area   
7                         On or near Sports/Recreation Area   
8                                    On or near Supermarket   

crime_group  Anti-Social Behaviour  Drug-Related Crime  Other Crime  \
0                               60                 110           10   
1                               72                  15            6   
2                              133                 229           13   
3                              829                 634           73   
4             

In [35]:
import pandas as pd

# Ensure the 'month' column is datetime
gdf['month'] = pd.to_datetime(gdf['month'])

# Create a formatted month column
gdf['formatted_month'] = gdf['month'].dt.strftime('%B %Y')  # e.g. "June 2024"

# Group by formatted month and crime group
month_crime_group = (
    gdf.groupby(['formatted_month', 'crime_group'])
       .size()
       .unstack(fill_value=0)
       .reindex(
           pd.date_range(start="2024-06-01", end="2025-05-01", freq='MS').strftime('%B %Y'),
           fill_value=0
       )
       .reset_index()
       .rename(columns={'index': 'month'})
)

# Print the table
print(month_crime_group)

# Save to CSV
month_crime_group.to_csv(
    '/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/excel-csvs/month_vs_crime_group.csv',
    index=False
)


crime_group           month  Anti-Social Behaviour  Drug-Related Crime  \
0                 June 2024                   1390                1011   
1                 July 2024                   1492                1085   
2               August 2024                   1264                 934   
3            September 2024                   1312                 883   
4              October 2024                   1981                1077   
5             November 2024                   1494                1095   
6             December 2024                   1053                 933   
7              January 2025                   1197                1014   
8             February 2025                   1102                1070   
9                March 2025                   1247                1291   
10               April 2025                   1343                1173   
11                 May 2025                   1377                1190   

crime_group  Other Crime  Possession 

In [36]:
# Get top 20 LSOAs by total crime count
top_20_lsoas = gdf['lsoa_name'].value_counts().nlargest(20).index

# Filter to only top 20
lsoa_filtered = gdf[gdf['lsoa_name'].isin(top_20_lsoas)]

# Group by LSOA and crime group
lsoa_crime_group = (
    lsoa_filtered.groupby(['lsoa_name', 'crime_group'])
                 .size()
                 .unstack(fill_value=0)
                 .reset_index()
)

# Print the table
print(lsoa_crime_group)

# Save to CSV
lsoa_crime_group.to_csv(
    '/Users/jamesjackson/Documents/liverpool_crime_analysis/csv_files/excel-csvs/top_20_lsoas_vs_crime_group.csv',
    index=False
)


crime_group        lsoa_name  Anti-Social Behaviour  Drug-Related Crime  \
0              Knowsley 003D                    146                  69   
1             Liverpool 006C                     35                  52   
2             Liverpool 022H                    120                  48   
3             Liverpool 023D                    115                 223   
4             Liverpool 031B                     74                  87   
5             Liverpool 060C                    527                 816   
6             Liverpool 060D                     83                 433   
7             Liverpool 060E                     78                 121   
8             Liverpool 061C                    198                 478   
9             Liverpool 061F                     66                 121   
10            Liverpool 062G                     86                 140   
11               Sefton 004H                     46                  72   
12               Sefton 0