In [3]:
import requests #call API
import pandas as pd #to change to dataframe
from time import sleep # prevent API server overload in case of blocking the requests

In [4]:
#Sutton coordinate
latitude = 51.3618
longitude = -0.1934

#list of months to import
months = ['2024-09', '2024-10', '2024-11', '2024-12', '2025-01', '2025-02']

In [5]:
def fetch_crime_data(lat, lng, months):
    all_data = []
    
    for month in months:
        print(f"📅 Fetching data for {month}...")
        
        # API URL structure
        url = f"https://data.police.uk/api/crimes-street/all-crime?lat={lat}&lng={lng}&date={month}"
        
        # API request
        response = requests.get(url)
        
        # check the response
        if response.status_code == 200:
            month_data = response.json()
            all_data.extend(month_data)
            print(f"✅ {len(month_data)} records fetched.")
        else:
            print(f"❌ Failed to fetch data for {month} (Status code: {response.status_code})")
        
        # hold to make not too much API is requested
        sleep(1)
        
    return pd.DataFrame(all_data)


In [6]:
df = fetch_crime_data(latitude, longitude, months)

📅 Fetching data for 2024-09...
✅ 457 records fetched.
📅 Fetching data for 2024-10...
✅ 452 records fetched.
📅 Fetching data for 2024-11...
✅ 448 records fetched.
📅 Fetching data for 2024-12...
✅ 362 records fetched.
📅 Fetching data for 2025-01...
✅ 419 records fetched.
📅 Fetching data for 2025-02...
✅ 367 records fetched.


In [7]:
# from location info column, extracted latitude, longitude and street_name only and saved in new columns
df['lat'] = df['location'].apply(lambda loc: float(loc['latitude']) if loc else None)
df['lng'] = df['location'].apply(lambda loc: float(loc['longitude']) if loc else None)
df['street_name'] = df['location'].apply(lambda loc: loc['street']['name'] if loc and 'street' in loc else None)


In [8]:
df[['category', 'month', 'street_name', 'lat', 'lng']].head(10)

Unnamed: 0,category,month,street_name,lat,lng
0,anti-social-behaviour,2024-09,On or near Belsize Gardens,51.368715,-0.19193
1,anti-social-behaviour,2024-09,On or near Bridge Road,51.359168,-0.193872
2,anti-social-behaviour,2024-09,On or near Cheam Road,51.360397,-0.203017
3,anti-social-behaviour,2024-09,On or near The Quadrant,51.359019,-0.19066
4,anti-social-behaviour,2024-09,On or near Elm Grove,51.366704,-0.193892
5,anti-social-behaviour,2024-09,On or near Manor Place,51.365706,-0.193313
6,anti-social-behaviour,2024-09,On or near Gem Yard,51.37049,-0.192177
7,anti-social-behaviour,2024-09,On or near Dallas Road,51.358006,-0.21427
8,anti-social-behaviour,2024-09,On or near Stanley Road,51.355252,-0.197128
9,anti-social-behaviour,2024-09,On or near Park Road,51.359398,-0.215336


In [9]:
df.to_csv("sutton_crime_data.csv")

In [14]:
# Group by street name and count number of crimes
top_streets = df['street_name'].value_counts().head(5)

# Display the result as a DataFrame for readability
top_streets_df = top_streets.reset_index()
top_streets_df.columns = ['Street Name', 'Number of Crimes']

top_streets_df


Unnamed: 0,Street Name,Number of Crimes
0,On or near Shopping Area,153
1,On or near Supermarket,117
2,On or near High Street,104
3,On or near Petrol Station,69
4,On or near Sutton,52
