# Integrate Monthly Datasets

## Set Up

Ensure that the required libraries are available by running the below code in the terminal before execution:
- pip install pandas


Execute the following in the jupyter notebook before execution to ensure that the required libraries are imported:

In [1]:
import pandas as pd

## Load Datasets

In [5]:
# Load data into dataframes.
df_air_quality = pd.read_csv('../air-quality/processed-financial-year.csv')
df_asthma_deaths = pd.read_csv('../health-stats/asthma-deaths/processed.csv')
df_asthma_edp = pd.read_csv('../health-stats/asthma-edp/processed.csv')

# View Headers.
print("Air Quality Headers:", df_air_quality.columns.tolist())

print("\nAsthma Deaths Headers:")
print(df_asthma_deaths.columns.tolist())

print("\nAsthma Emergency Department Presentations Headers:")
print(df_asthma_edp.columns.tolist())

Air Quality Headers: ['financial year', 'lhd', 'CO ppm', 'NO pphm', 'NO2 pphm', 'OZONE pphm', 'PM10 µg/m³', 'SO2 pphm']

Asthma Deaths Headers:
['lhd', 'financial year', 'rate per 100,000 population']

Asthma Emergency Department Presentations Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population', 'Persons rate per 100,000 population']


## Data Manipulation

Rename columns for clarity.

In [9]:
# Asthma Deaths
df_asthma_deaths = df_asthma_deaths.rename(columns={
    'rate per 100,000 population': 'asthma deaths [rate per 100,000]',
})

# Asthma Emergency Department Presentations
df_asthma_edp = df_asthma_edp.rename(columns={
    'Persons rate per 100,000 population': 'asthma edp [rate per 100,000]',
})

Drop Gendered Data.

In [10]:
# Drop columns with 'Male' or 'Female' in the header from df_asthma_edp
df_asthma_edp = df_asthma_edp.drop(columns=[col for col in df_asthma_edp.columns if 'Male' in col or 'Female' in col])

# View the updated dataframe
df_asthma_edp.head()

Unnamed: 0,financial year,lhd,"asthma edp [rate per 100,000]"
0,2014/2015,Central Coast,366.6
1,2015/2016,Central Coast,380.9
2,2016/2017,Central Coast,378.4
3,2017/2018,Central Coast,368.2
4,2018/2019,Central Coast,378.8


## Merge Datasets

Merge dataframes on 'financial year' and 'lhd' - inner.

In [14]:
# Merge dataframes on 'date' and 'lhd' columns.
df_merged = df_air_quality
df_merged = pd.merge(df_merged, df_asthma_deaths, on=['financial year', 'lhd'], how='inner')
df_merged = pd.merge(df_merged, df_asthma_edp, on=['financial year', 'lhd'], how='inner')

# Sort the merged dataframe by 'lhd' and 'financial year'.
df_merged = df_merged.sort_values(by=['lhd', 'financial year'])

# View headers of merged dataframe.
df_merged.head()

Unnamed: 0,financial year,lhd,CO ppm,NO pphm,NO2 pphm,OZONE pphm,PM10 µg/m³,SO2 pphm,"asthma deaths [rate per 100,000]","asthma edp [rate per 100,000]"
0,2014/2015,Central Coast,0.1,0.233333,0.466667,1.79119,15.158333,0.05,0.675,366.6
1,2015/2016,Central Coast,0.108333,0.216667,0.458333,1.691667,15.375,0.058333,0.725,380.9
2,2016/2017,Central Coast,0.1,0.216667,0.491667,1.775,15.358333,0.058333,0.7,378.4
3,2017/2018,Central Coast,0.108333,0.175,0.441667,1.861183,16.816667,0.075,0.675,368.2
4,2018/2019,Central Coast,0.108333,0.216667,0.416667,1.733333,17.75,0.091667,0.675,378.8


## Output Dataset

In [15]:
df_merged.to_csv('data-persons.csv', index=False)