# Integrate Monthly Datasets

## Set Up

Ensure that the required libraries are available by running the below code in the terminal before execution:
- pip install pandas


Execute the following in the jupyter notebook before execution to ensure that the required libraries are imported:

In [8]:
import pandas as pd

## Load Datasets

In [9]:
# Load data into dataframes.
df_air_quality = pd.read_csv('../../2-nsw-air-quality/data-processed-financial-year.csv')
df_asthma_deaths = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/deaths/data-processed.csv')
df_asthma_edp = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/emergency-department-presentations/yearly/data-processed-alt.csv')
df_asthma_hospitalisations = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/hospitalisations/data-processed-alt.csv')
df_asthma_children = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/prevelance-in-children/data-processed-alt.csv')

# View Headers.
print("Air Quality Headers:")
print(df_air_quality.columns.tolist())

print("\nAsthma Deaths Headers:")
print(df_asthma_deaths.columns.tolist())

print("\nAsthma Emergency Department Presentations Headers:")
print(df_asthma_edp.columns.tolist())

print("\nAsthma Hospitalisations Headers:")
print(df_asthma_hospitalisations.columns.tolist())

print("\nAsthma Prevelance in Children Headers:")
print(df_asthma_children.columns.tolist())

Air Quality Headers:
['financial year', 'lhd', 'CO ppm', 'NO pphm', 'NO2 pphm', 'OZONE pphm', 'PM10 µg/m³', 'SO2 pphm']

Asthma Deaths Headers:
['lhd', 'financial year', 'rate per 100,000 population']

Asthma Emergency Department Presentations Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population']

Asthma Hospitalisations Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population']

Asthma Prevelance in Children Headers:
['lhd', 'financial year', 'per cent']


## Data Manipulation

Rename columns for clarity.

In [10]:
# Asthma Deaths
df_asthma_deaths = df_asthma_deaths.rename(columns={
    'rate per 100,000 population': 'asthma deaths [rate per 100,000]',
})

# Asthma Emergency Department Presentations
df_asthma_edp = df_asthma_edp.rename(columns={
    'Female rate per 100,000 population': 'asthma edp [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'asthma edp [m] [rate per 100,000]'
})

# Asthma Hospitalisations
df_asthma_hospitalisations = df_asthma_hospitalisations.rename(columns={
    'Female rate per 100,000 population': 'asthma hospitalisations [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'asthma hospitalisations [m] [rate per 100,000]'
})

# Asthma Prevelance in Children
df_asthma_children = df_asthma_children.rename(columns={
    'per cent': 'asthma prevelance in children [% of children]'
})

## Merge Datasets

Merge dataframes on 'date' and 'lhd' columns - inner

In [11]:
# Merge dataframes on 'date' and 'lhd' columns.
df_merged_inner = df_air_quality
df_merged_inner = pd.merge(df_merged_inner, df_asthma_deaths, on=['financial year', 'lhd'], how='inner')
df_merged_inner = pd.merge(df_merged_inner, df_asthma_edp, on=['financial year', 'lhd'], how='inner')
df_merged_inner = pd.merge(df_merged_inner, df_asthma_hospitalisations, on=['financial year', 'lhd'], how='inner')
df_merged_inner = pd.merge(df_merged_inner, df_asthma_children, on=['financial year', 'lhd'], how='inner')

# View headers of merged dataframe.
df_merged_inner.head()

Unnamed: 0,financial year,lhd,CO ppm,NO pphm,NO2 pphm,OZONE pphm,PM10 µg/m³,SO2 pphm,"asthma deaths [rate per 100,000]","asthma edp [f] [rate per 100,000]","asthma edp [m] [rate per 100,000]","asthma hospitalisations [f] [rate per 100,000]","asthma hospitalisations [m] [rate per 100,000]",asthma prevelance in children [% of children]
0,2014/2015,Central Coast,0.1,0.166667,0.444444,1.922222,16.233333,0.044444,0.675,373.6,354.3,182.2,148.5,15.7
1,2015/2016,Central Coast,0.1,0.188889,0.444444,1.755556,15.833333,0.066667,0.725,405.8,350.7,185.2,157.4,14.4
2,2016/2017,Central Coast,0.1,0.18,0.45,1.87,16.1,0.07,0.7,412.0,339.7,176.7,149.1,12.55
3,2017/2018,Central Coast,0.1,0.128571,0.4,2.028571,17.985714,0.085714,0.675,386.7,344.0,154.4,141.6,11.15
4,2014/2015,Hunter New England,0.246667,0.465115,0.707188,1.991198,22.876567,0.155568,0.825,455.6,445.5,146.7,136.0,16.3


Merge dataframes on 'date' and 'lhd' columns - outer

In [12]:
# Merge dataframes on 'date' and 'lhd' columns.
df_merged_outer = df_air_quality
df_merged_outer = pd.merge(df_merged_outer, df_asthma_deaths, on=['financial year', 'lhd'], how='outer')
df_merged_outer = pd.merge(df_merged_outer, df_asthma_edp, on=['financial year', 'lhd'], how='outer')
df_merged_outer = pd.merge(df_merged_outer, df_asthma_hospitalisations, on=['financial year', 'lhd'], how='outer')
df_merged_outer = pd.merge(df_merged_outer, df_asthma_children, on=['financial year', 'lhd'], how='outer')

# View headers of merged dataframe.
df_merged_outer.head()

Unnamed: 0,financial year,lhd,CO ppm,NO pphm,NO2 pphm,OZONE pphm,PM10 µg/m³,SO2 pphm,"asthma deaths [rate per 100,000]","asthma edp [f] [rate per 100,000]","asthma edp [m] [rate per 100,000]","asthma hospitalisations [f] [rate per 100,000]","asthma hospitalisations [m] [rate per 100,000]",asthma prevelance in children [% of children]
0,2000/2001,Central Coast,0.065493,0.214184,0.39078,1.762143,15.59507,0.07,,,,,,
1,2000/2001,Hunter New England,0.225,0.642865,0.733538,1.9098,22.291969,0.160705,,,,,,
2,2000/2001,Illawarra Shoalhaven,0.225,0.525,0.7875,1.85,20.091234,0.125,,,,,,
3,2000/2001,Mid North Coast,,0.126667,0.288333,1.569492,,0.045,,,,,,
4,2000/2001,Murrumbidgee,,0.112676,0.35446,1.933028,24.18018,0.065116,,,,,,


In [13]:
# Sort by lhd, financial year.
df_merged_inner = df_merged_inner.sort_values(by=['lhd', 'financial year'])
df_merged_outer = df_merged_outer.sort_values(by=['lhd', 'financial year'])

## Output Dataset

In [14]:
df_merged_inner.to_csv('data-merged.csv', index=False)
df_merged_outer.to_csv('data-merged-alt.csv', index=False)