# Preamble
Purpose: Downloads and saves data from Open Data Toronto about Toronto reported crimes statistics.

Author: Justin Bi

Date: 20 September 2024

Contact: justin.bi@mail.utoronto.ca

In [4]:
import requests
 
# Toronto Open Data is stored in a CKAN instance. It's APIs are documented here:
# https://docs.ckan.org/en/latest/api/
 
# To hit our API, you'll be making requests to:
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"
 
# Datasets are called "packages". Each package can contain many "resources"
# To retrieve the metadata for this package and its resources, use the package name in this page's URL:
url = base_url + "/api/3/action/package_show"
params = { "id": "police-annual-statistical-report-reported-crimes"}
package = requests.get(url, params = params).json()
 
# To get resource data:
for idx, resource in enumerate(package["result"]["resources"]):
 
       # for datastore_active resources:
       if resource["datastore_active"]:
 
           # To get all records in CSV format:
           url = base_url + "/datastore/dump/" + resource["id"]
           resource_dump_data = requests.get(url).text
           print(resource_dump_data)
 
           # To selectively pull records and attribute-level metadata:
           url = base_url + "/api/3/action/datastore_search"
           p = { "id": resource["id"] }
           resource_search_data = requests.get(url, params = p).json()["result"]
           print(resource_search_data)
           # This API call has many parameters. They're documented here:
           # https://docs.ckan.org/en/latest/maintaining/datastore.html
 
       # To get metadata for non datastore_active resources:
       if not resource["datastore_active"]:
           url = base_url + "/api/3/action/resource_show?id=" + resource["id"]
           resource_metadata = requests.get(url).json()
           print(resource_metadata)
           # From here, you can use the "url" attribute to download this file

_id,REPORT_YEAR,DIVISION,CATEGORY,SUBTYPE,COUNT_,COUNT_CLEARED
1,2022,D32,Crimes Against Property,Auto Theft,79,0
2,2023,D12,Crimes Against Property,Break & Enter-House,1,0
3,2014,D13,Crimes Against Property,Auto Theft,7,0
4,2021,NSA,Crimes Against the Person,Sexual Violation,1,0
5,2020,D53,Crimes Against Property,Break & Enter-Apartment,2,0
6,2017,D43,Crimes Against Property,Break & Enter-Apartment,1,0
7,2023,D22,Crimes Against Property,Theft Under $5000,3,0
8,2018,D33,Crimes Against Property,Break & Enter-Other,1,0
9,2014,D55,Crimes Against the Person,Sexual Violation,1,0
10,2022,D43,Crimes Against Property,Theft Over $5000,2,0
11,2021,D23,Crimes Against Property,Break & Enter-Commercial,1,0
12,2016,D52,Crimes Against Property,Break & Enter-House,1,0
13,2017,D11,Crimes Against Property,Auto Theft,1,0
14,2017,D42,Crimes Against Property,Auto Theft,13,0
15,2019,D53,Crimes Against Property,Break & Enter-Apartment,2,0
16,2014,D23,Crimes Against the Person,Robbery-Other,1,0
17,2021,D13,Cr

In [5]:
# Part of this cell is done with ChatGPT
for resource in package["result"]["resources"]:
    if not resource["datastore_active"]:
        # Fetch the metadata for non-datastore resource
        url = base_url + "/api/3/action/resource_show?id=" + resource["id"]
        resource_metadata = requests.get(url).json()
        
        # Get the download URL
        download_url = resource_metadata["result"]["url"]
        print(f"Downloading file from: {download_url}")
        
        # Download the file
        file_data = requests.get(download_url).content
        
        # Extract the filename from the URL and save it
        file_name = download_url.split("/")[-1]
        with open(file_name, "wb") as file:
            file.write(file_data)
        print(f"File {file_name} downloaded successfully.")


Downloading file from: https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/police-annual-statistical-report-reported-crimes/resource/900b1303-c7d1-43b8-99e4-d04c7dd4607f/download/reported-crimes.csv
File reported-crimes.csv downloaded successfully.
Downloading file from: https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/police-annual-statistical-report-reported-crimes/resource/3c234f46-6d0e-4514-8dcc-28692142ff49/download/reported-crimes.xml
File reported-crimes.xml downloaded successfully.
Downloading file from: https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/police-annual-statistical-report-reported-crimes/resource/f088237d-86cc-4e75-bf6f-92885e48fd55/download/reported-crimes.json
File reported-crimes.json downloaded successfully.
