In [1]:
import logging
import requests
import pandas as pd

# Configure logging
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(), logging.FileHandler('package_id')])

def load_data_OpenDataTO(package_id):
    """
    Load a dataset from the Toronto Open Data API.

    Parameters:
    - package_id (str): The ID of the package containing the dataset in a dict {}.

    Returns:
    - pd.DataFrame or None: A DataFrame containing the dataset if successfully loaded, 
                            or None if an error occurred.
    """
    # Define the base URL for the Toronto Open Data API
    base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"

    # Define the URL to access the metadata of a package
    url = base_url + "/api/3/action/package_show"

    # Set the parameters to retrieve information about the desired package
    params = {"id": package_id}
         
    # Send a GET request to retrieve metadata about the package
    response = requests.get(url, params=params)

    # Check if the GET request was successful
    if response.status_code == 200:
        # Parse the JSON response
        package_info = response.json()
        
        # Extract information about the resources in the package
        resources = package_info['result']['resources']
        
        # Look for a JSON resource and load its data using CKAN API
        for resource in resources:
            if resource['format'].lower() == 'json':
                url = base_url + "/api/3/action/datastore_search"
                params = {"id": resource["id"]}
                resource_search_data = requests.get(url, params=params).json()["result"]
                df = pd.DataFrame(resource_search_data["records"])
                logging.info("Toronto Open DataFrame created successfully using CKAN API parameters.")
                return df
        else:
            logging.warning("No JSON resource found in the package.")
            return None
            
    else:
        logging.error("Failed to retrieve metadata from the API.")
        return None

# Load the Annual Police Report data into the DataFrame
    
package_id = "police-annual-statistical-report-arrested-and-charged-persons"

annual_police_report = load_data_OpenDataTO(package_id)
if annual_police_report is not None:
    annual_police_report.head()
else:
    logging.error("Failed to load the dataset.")
annual_police_report.head()

INFO:root:Toronto Open DataFrame created successfully using CKAN API parameters.


Unnamed: 0,_id,ARREST_YEAR,DIVISION,HOOD_158,NEIGHBOURHOOD_158,SEX,AGE_COHORT,AGE_GROUP,CATEGORY,SUBTYPE,ARREST_COUNT
0,1,2019,D14,83,Dufferin Grove (83),Female,25 to 34,Adult,Other Criminal Code Violations,Other,1
1,2,2022,D12,30,Brookhaven-Amesbury (30),Male,<18,Youth,Crimes Against the Person,Assaults,2
2,3,2018,D14,165,Harbourfront-CityPlace (165),Male,18 to 24,Adult,Other Criminal Code Violations,Other,1
3,4,2015,D22,18,New Toronto (18),Male,25 to 34,Adult,Controlled Drugs and Substances Act,Other,3
4,5,2014,D52,78,Kensington-Chinatown (78),Male,25 to 34,Adult,Other Criminal Code Violations,Other,46


In [4]:
#converting column names to lowecase
def rename_data(annual_police_report):
  if annual_police_report is None:
    raise ValueError('No columns found')
  report=annual_police_report.rename(columns=str.lower)
  return report
rename_data(annual_police_report)

Unnamed: 0,_id,arrest_year,division,hood_158,neighbourhood_158,sex,age_cohort,age_group,category,subtype,arrest_count
0,1,2019,D14,83,Dufferin Grove (83),Female,25 to 34,Adult,Other Criminal Code Violations,Other,1
1,2,2022,D12,30,Brookhaven-Amesbury (30),Male,<18,Youth,Crimes Against the Person,Assaults,2
2,3,2018,D14,165,Harbourfront-CityPlace (165),Male,18 to 24,Adult,Other Criminal Code Violations,Other,1
3,4,2015,D22,18,New Toronto (18),Male,25 to 34,Adult,Controlled Drugs and Substances Act,Other,3
4,5,2014,D52,78,Kensington-Chinatown (78),Male,25 to 34,Adult,Other Criminal Code Violations,Other,46
