# Integrate Monthly Datasets

## Set Up

Ensure that the required libraries are available by running the below code in the terminal before execution:
- pip install pandas


Execute the following in the jupyter notebook before execution to ensure that the required libraries are imported:

In [24]:
import pandas as pd

## Load Datasets

In [25]:
# Load data into dataframes.
df_air_quality = pd.read_csv('../../2-nsw-air-quality/data-processed-financial-year.csv')
df_asthma_deaths = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/deaths/data-processed.csv')
df_asthma_edp = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/emergency-department-presentations/yearly/data-processed-alt.csv')
df_asthma_hospitalisations = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/hospitalisations/data-processed-alt.csv')
df_asthma_children = pd.read_csv('../../3-nsw-health-stats/respiratory-health/asthma/prevelance-in-children/data-processed-alt.csv')
df_copd_deaths = pd.read_csv('../../3-nsw-health-stats/respiratory-health/chronic-obstructive-pulmonary-disease/deaths/data-processed-alt.csv')
df_copd_hospitalisations = pd.read_csv('../../3-nsw-health-stats/respiratory-health/chronic-obstructive-pulmonary-disease/hospitalisations/data-processed-alt.csv')
df_iap_deaths = pd.read_csv('../../3-nsw-health-stats/respiratory-health/influenza-and-pneumonia/deaths/data-processed-alt.csv')
df_iap_hospitalisations = pd.read_csv('../../3-nsw-health-stats/respiratory-health/influenza-and-pneumonia/hospitalisations/data-processed-alt.csv')

# View Headers.
print("Air Quality Headers:", df_air_quality.columns.tolist())

print("\nAsthma Deaths Headers:")
print(df_asthma_deaths.columns.tolist())

print("\nAsthma Emergency Department Presentations Headers:")
print(df_asthma_edp.columns.tolist())

print("\nAsthma Hospitalisations Headers:")
print(df_asthma_hospitalisations.columns.tolist())

print("\nAsthma Prevelance in Children Headers:")
print(df_asthma_children.columns.tolist())

print("\nChronic Obstructive Pulmonary Disease Deaths Headers:")
print(df_copd_deaths.columns.tolist())

print("\nChronic Obstructive Pulmonary Disease Hospitalisations Headers:")
print(df_copd_hospitalisations.columns.tolist())

print("\nInfluenza and Pneumonia Deaths Headers:")
print(df_iap_deaths.columns.tolist())

print("\nInfluenza and Pneumonia Hospitalisations Headers:")
print(df_iap_hospitalisations.columns.tolist())

Air Quality Headers: ['financial year', 'lhd', 'CO ppm', 'NO pphm', 'NO2 pphm', 'OZONE pphm', 'PM10 µg/m³', 'SO2 pphm']

Asthma Deaths Headers:
['lhd', 'financial year', 'rate per 100,000 population']

Asthma Emergency Department Presentations Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population']

Asthma Hospitalisations Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population']

Asthma Prevelance in Children Headers:
['lhd', 'financial year', 'per cent']

Chronic Obstructive Pulmonary Disease Deaths Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population']

Chronic Obstructive Pulmonary Disease Hospitalisations Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate per 100,000 population']

Influenza and Pneumonia Deaths Headers:
['financial year', 'lhd', 'Female rate per 100,000 population', 'Male rate

## Data Manipulation

Rename columns for clarity.

In [26]:
# Asthma Deaths
df_asthma_deaths = df_asthma_deaths.rename(columns={
    'rate per 100,000 population': 'asthma deaths [rate per 100,000]',
})

# Asthma Emergency Department Presentations
df_asthma_edp = df_asthma_edp.rename(columns={
    'Female rate per 100,000 population': 'asthma edp [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'asthma edp [m] [rate per 100,000]'
})

# Asthma Hospitalisations
df_asthma_hospitalisations = df_asthma_hospitalisations.rename(columns={
    'Female rate per 100,000 population': 'asthma hospitalisations [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'asthma hospitalisations [m] [rate per 100,000]'
})

# Asthma Prevelance in Children
df_asthma_children = df_asthma_children.rename(columns={
    'per cent': 'asthma prevelance in children [% of children]'
})

# Chronic Obstructive Pulmonary Disease Deaths
df_copd_deaths = df_copd_deaths.rename(columns={
    'Female rate per 100,000 population': 'copd deaths [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'copd deaths [f] [rate per 100,000]'
})

# Chronic Obstructive Pulmonary Disease Hospitalisations
df_copd_hospitalisations = df_copd_hospitalisations.rename(columns={
    'Female rate per 100,000 population': 'copd hospitalisations [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'copd hospitalisations [f] [rate per 100,000]'
})

# Influenza and Pneumonia Deaths
df_iap_deaths = df_iap_deaths.rename(columns={
    'Female rate per 100,000 population': 'iap deaths [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'iap deaths [f] [rate per 100,000]'
})

# Influenza and Pneumonia Hospitalisations
df_iap_hospitalisations = df_iap_hospitalisations.rename(columns={
    'Female rate per 100,000 population': 'iap hospitalisations [f] [rate per 100,000]',
    'Male rate per 100,000 population': 'iap hospitalisations [f] [rate per 100,000]'
})

## Merge Datasets

Merge dataframes on 'date' and 'lhd' columns.

In [27]:
# Merge dataframes on 'date' and 'lhd' columns.
df_merged = df_air_quality
df_merged = pd.merge(df_merged, df_asthma_deaths, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_asthma_edp, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_asthma_hospitalisations, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_asthma_children, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_copd_deaths, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_copd_hospitalisations, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_iap_deaths, on=['financial year', 'lhd'], how='outer')
df_merged = pd.merge(df_merged, df_iap_hospitalisations, on=['financial year', 'lhd'], how='outer')

# Sort the merged dataframe by 'lhd' and 'financial year'.
df_merged = df_merged.sort_values(by=['lhd', 'financial year'])

# View headers of merged dataframe.
df_merged.head()

Unnamed: 0,financial year,lhd,CO ppm,NO pphm,NO2 pphm,OZONE pphm,PM10 µg/m³,SO2 pphm,"asthma deaths [rate per 100,000]","asthma edp [f] [rate per 100,000]",...,"asthma hospitalisations [m] [rate per 100,000]",asthma prevelance in children [% of children],"copd deaths [f] [rate per 100,000]","copd deaths [f] [rate per 100,000].1","copd hospitalisations [f] [rate per 100,000]","copd hospitalisations [f] [rate per 100,000].1","iap deaths [f] [rate per 100,000]","iap deaths [f] [rate per 100,000].1","iap hospitalisations [f] [rate per 100,000]","iap hospitalisations [f] [rate per 100,000].1"
9,2001/2002,Central Coast,,,,,,,,,...,204.6,,,,235.4,348.2,,,198.1,294.6
24,2002/2003,Central Coast,,,,,,,,,...,167.2,,,,228.1,334.0,,,213.2,317.7
39,2003/2004,Central Coast,,,,,,,,,...,154.1,16.7,,,219.0,377.1,,,217.8,308.5
54,2004/2005,Central Coast,,,,,,,,,...,184.7,18.15,,,251.2,390.6,,,190.7,278.8
69,2005/2006,Central Coast,,,,,,,,,...,173.0,18.6,,,237.8,368.5,,,208.5,321.6


## Output Dataset

In [28]:
df_merged.to_csv('data-merged.csv', index=False)