Summary of Health Risks and Sources

- PM10:
    - Health Risks: Respiratory and cardiovascular problems, eye/nose/throat irritation.
    - Sources: Natural (dust storms, wildfires), Human (construction, industrial, vehicles).
- PM2.5:
    - Health Risks: Deep lung penetration, cardiovascular problems, premature death, cancer.
    - Sources: Combustion (vehicles, residential heating, power plants), secondary formation.
- SO2:
    - Health Risks: Respiratory problems, cardiovascular issues, contributes to acid rain.
    - Sources: Fossil fuel combustion, volcanic eruptions.
- CO:
    - Health Risks: Oxygen deprivation, exacerbates heart disease.
    - Sources: Incomplete combustion (vehicles, residential heating, industrial), wildfires.
- O3:
    - Health Risks: Respiratory irritation, decreased lung function, premature aging of the lungs.
    - Sources: Secondary pollutant formed from sunlight reacting with hydrocarbons and NOx.
- NO2:
    - Health Risks: Respiratory problems, cardiovascular effects, contributes to ozone and PM2.5 formation.
    - Sources: Combustion (vehicles, power plants, industrial), natural sources (lightning, soil).

In [None]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

### Remap Values

In [None]:
cmap = plt.get_cmap("RdYlGn")
num_colors = 5
colors = [cmap(i / num_colors) for i in range(num_colors)]
hex_colors = [mcolors.to_hex(color) for color in colors]
hex_colors

In [None]:
map_color_values = {
    'Good': 'background: #66bd63; color:black;',
    'Moderate': 'background: #d9ef8b; color:black;',
    'Unhealthy for Sensitive Groups': 'background: #fee08b; color:black;',
    'Unhealthy': 'background: #f46d43; color:white;',
    'Very Unhealthy': 'background: #a50026; color:white;',
    'No Record': 'background: black; color:black;'
}

In [None]:
def pm10_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 50:
        return "Good"
    elif value <= 100:
        return "Moderate"
    elif value <= 150:
        return "Unhealthy for Sensitive Groups"
    elif value <= 200:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def pm2_5_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 12:
        return "Good"
    elif value <= 35:
        return "Moderate"
    elif value <= 55:
        return "Unhealthy for Sensitive Groups"
    elif value <= 150:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def so2_to_category(value):
    if pd.isna(value): return "No Record"
    
    if value <= 75:
        return "Good"
    elif value <= 185:
        return "Moderate"
    elif value <= 304:
        return "Unhealthy for Sensitive Groups"
    elif value <= 604:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def co_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 4.4:
        return "Good"
    elif value <= 9.4:
        return "Moderate"
    elif value <= 12.4:
        return "Unhealthy for Sensitive Groups"
    elif value <= 15.4:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def o3_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 54:
        return "Good"
    elif value <= 70:
        return "Moderate"
    elif value <= 85:
        return "Unhealthy for Sensitive Groups"
    elif value <= 105:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def no2_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 53:
        return "Good"
    elif value <= 100:
        return "Moderate"
    elif value <= 360:
        return "Unhealthy for Sensitive Groups"
    elif value <= 649:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

### Load Dataset

In [None]:
df= pd.read_csv("df_ispa_dki_all.csv")
df['date'] = pd.to_datetime(df['date'])

In [None]:
df.isna().sum()

### Summary

In [None]:
def get_summary(df, col_place, col_value, mapfunc):
    df = df.copy()
    
    places = df[col_place].unique()
    val_counts = {}
    for place in places:
        _df = df.query(f"{col_place}=='{place}'")[col_value]        
        val_counts[place] = _df.map(mapfunc).value_counts().to_dict()

    df_val_counts = pd.DataFrame(val_counts)
    df_val_counts.index.name = col_value
    df_val_counts.columns.name = col_place
    
    return df_val_counts.fillna(0).astype(int)

In [None]:
get_summary(df, 'district', 'pm25', pm2_5_to_category)

### Jakarta Air Quality in each Period of Date

#### Year-month

In [None]:
month_remap = df['month'].unique().tolist()

In [None]:
df_over_year_month = df.pivot_table(
    index=['station'],
    columns=['year',df['date'].dt.month.rename('month')],
    values='pm25',
    aggfunc='mean'
).round(1)

In [None]:
df_over_year_month.columns.levels[1]

In [None]:
df_over_year_month.columns = df_over_year_month.columns.set_levels(month_remap, level=1)

In [None]:
df_over_year_month.style.map(lambda x: map_color_values[pm2_5_to_category(x)]).format(precision=1)

#### Monthly Trend in each Year

In [None]:
monthly_trend = df.groupby(
    ['year', df['date'].dt.month.rename('month')]
).agg({'pm25':'mean'}).T.melt()
monthly_trend['value'] = monthly_trend['value'].round(2)

In [None]:
monthly_trend = monthly_trend.dropna(axis=0, how='any')

In [None]:
fig_monthly_trend = px.line(
    monthly_trend, x='month', y='value', color='year', template='plotly_white', 
    markers=True
).update_traces(hovertemplate=None, textposition="top center")\
.update_layout(xaxis=dict(nticks=20), hovermode='x', title='Monthly Trend in each Year')

In [None]:
fig_monthly_trend

#### Daily Trend in each Month

In [None]:
day_of_month_trend = df.groupby(['year', 'day']).agg({'pm25':'mean'}).T.melt()
day_of_month_trend['value'] = day_of_month_trend['value'].round(0)

In [None]:
day_of_month_trend = day_of_month_trend.dropna(axis=0, how='any')

In [None]:
fig_day_of_month_trend = px.line(
    day_of_month_trend, x='day', y='value', color='year', template='plotly_white', 
    markers=True
).update_traces(hovertemplate=None, textposition="top center")\
.update_layout(xaxis=dict(nticks=20), hovermode='x', title='Day of Month Trend over Year')

In [None]:
fig_day_of_month_trend

#### Day of Week Trend

In [None]:
day_of_week_trend = df.groupby(['year', df['date'].dt.day_of_week.rename('dow')]).agg({'pm25':'mean'}).T.melt()
day_of_week_trend['value'] = day_of_week_trend['value'].round(0)

In [None]:
day_of_week_trend = day_of_week_trend.dropna(axis=0, how='any')

In [None]:
remap_dow = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

In [None]:
fig_day_of_week_trend = px.line(
    day_of_week_trend, x='dow', y='value', color='year', template='plotly_white', 
    markers=True
).update_traces(hovertemplate=None, textposition="top center")\
.update_layout(
    xaxis=dict(nticks=20, tickmode = 'array',  tickvals=[0, 1, 2, 3, 4, 5, 6], ticktext=remap_dow), 
    hovermode='x', title='Day of Week Trend over Year')

In [None]:
fig_day_of_week_trend