# Considerations
This notebook is intended to be used to analyze the data from the `data` folder. 
if you haven't already, please run the `src/config.py` file to set up the environment variables and the `scripts/download_data.py` to download the relevant data sources.

In [1]:
# Import the class OpenToronto from Downloaders
from Downloaders import OpenCanada as oc
from Downloaders import Datasets as ds

# Create an instance of the OpenCanada class
downloader = oc.TorontoDownloader()
homeless = ds.HomelessnessDataset()

In [2]:
downloader.load_pages(homeless.get_urls())

[Downloader] Added 12 urls to the list.


In [8]:
downloader.get_datasets_info()
downloader.download_datasets(output_directory='data')

Loaded 12 pages.
0) Page URL: https://open.toronto.ca/dataset/deaths-of-people-experiencing-homelessness/, Name: Homeless deaths by month, Last Modified: None, Type: CSV, Size: 0.00 MB, URL: https://ckan0.cf.opendata.inter.prod-toronto.ca/datastore/dump/8b2e5ec9-7cee-49cc-a67e-bba38e5077be
1) Page URL: https://open.toronto.ca/dataset/deaths-of-people-experiencing-homelessness/, Name: Homeless deaths by month.csv, Last Modified: 2024-04-17T21:09:06.846517, Type: CSV, Size: 0.00 MB, URL: https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/a7ae08f3-c512-4a88-bb3c-ab40eca50c5e/resource/dc4ec2fa-d655-46ca-af32-f216d26e9804/download/homeless-deaths-by-month.csv
2) Page URL: https://open.toronto.ca/dataset/deaths-of-people-experiencing-homelessness/, Name: Homeless deaths by month.xml, Last Modified: 2024-04-17T21:09:08.113343, Type: XML, Size: 0.01 MB, URL: https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/a7ae08f3-c512-4a88-bb3c-ab40eca50c5e/resource/2ba00593-7c7f-426c-b1dd-e90c96

# Data Analysis

## Homeless Deaths

In [22]:
import pandas as pd

data_by_cause  = pd.read_csv('./data/deaths-of-people-experiencing-homelessness/homeless-deaths-by-cause.csv')
data_by_demographics = pd.read_csv('./data/deaths-of-people-experiencing-homelessness/homeless-deaths-by-demographics.csv')
data_by_month = pd.read_csv('./data/deaths-of-people-experiencing-homelessness/homeless-deaths-by-month.csv')

In [18]:
# Standardize `Cause_of_death` values
data_by_cause['Cause_of_death'] = data_by_cause['Cause_of_death'].str.lower().str.strip()

# Convert Data Types (if necessary)
data_by_cause['Year of death'] = data_by_cause['Year of death'].astype(int)
data_by_cause['Count'] = data_by_cause['Count'].astype(int)

# Handle outliers (example: cap the maximum value)
data_by_cause['Count'] = data_by_cause['Count'].apply(lambda x: min(x, 50))

# Save the cleaned dataset
data_by_cause.to_csv('./cleaned-data/cleaned_homeless-deaths-by-cause.csv', index=False)

In [20]:
# Convert Data Types (if necessary)
data_by_demographics['Year of death'] = data_by_demographics['Year of death'].astype(int)
data_by_demographics['Count'] = data_by_demographics['Count'].astype(int)

# Handle outliers (if necessary, similar to the previous example)

# Save the cleaned dataset
data_by_demographics.to_csv('./cleaned-data/cleaned_homeless-deaths-by-demographics.csv', index=False)


In [24]:
# Convert Data Types (if necessary)
data_by_month['Year of death'] = data_by_month['Year of death'].astype(int)
data_by_month['Count'] = data_by_month['Count'].astype(int)

# Handle outliers (if necessary, similar to the previous example)

# Save the cleaned dataset
data_by_month.to_csv('./cleaned-data/cleaned_homeless-deaths-by-month.csv', index=False)
