Summary of Health Risks and Sources

- PM10:
    - Health Risks: Respiratory and cardiovascular problems, eye/nose/throat irritation.
    - Sources: Natural (dust storms, wildfires), Human (construction, industrial, vehicles).
- PM2.5:
    - Health Risks: Deep lung penetration, cardiovascular problems, premature death, cancer.
    - Sources: Combustion (vehicles, residential heating, power plants), secondary formation.
- SO2:
    - Health Risks: Respiratory problems, cardiovascular issues, contributes to acid rain.
    - Sources: Fossil fuel combustion, volcanic eruptions.
- CO:
    - Health Risks: Oxygen deprivation, exacerbates heart disease.
    - Sources: Incomplete combustion (vehicles, residential heating, industrial), wildfires.
- O3:
    - Health Risks: Respiratory irritation, decreased lung function, premature aging of the lungs.
    - Sources: Secondary pollutant formed from sunlight reacting with hydrocarbons and NOx.
- NO2:
    - Health Risks: Respiratory problems, cardiovascular effects, contributes to ozone and PM2.5 formation.
    - Sources: Combustion (vehicles, power plants, industrial), natural sources (lightning, soil).

In [1]:
import pandas as pd
import plotly.express as px

### Remap Values

In [2]:
map_color_values = {
    'Good': 'lightgreen',
    'Moderate': 'lightblue',
    'Unhealthy for Sensitive Groups': 'yellow',
    'Unhealthy': 'tomato',
    'Very Unhealthy': 'red'
}

In [16]:
def pm10_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 50:
        return "Good"
    elif value <= 100:
        return "Moderate"
    elif value <= 150:
        return "Unhealthy for Sensitive Groups"
    elif value <= 200:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def pm2_5_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 12:
        return "Good"
    elif value <= 35:
        return "Moderate"
    elif value <= 55:
        return "Unhealthy for Sensitive Groups"
    elif value <= 150:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def so2_to_category(value):
    if pd.isna(value): return "No Record"
    
    if value <= 75:
        return "Good"
    elif value <= 185:
        return "Moderate"
    elif value <= 304:
        return "Unhealthy for Sensitive Groups"
    elif value <= 604:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def co_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 4.4:
        return "Good"
    elif value <= 9.4:
        return "Moderate"
    elif value <= 12.4:
        return "Unhealthy for Sensitive Groups"
    elif value <= 15.4:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def o3_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 54:
        return "Good"
    elif value <= 70:
        return "Moderate"
    elif value <= 85:
        return "Unhealthy for Sensitive Groups"
    elif value <= 105:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

def no2_to_category(value):
    if pd.isna(value): return "No Record"
        
    if value <= 53:
        return "Good"
    elif value <= 100:
        return "Moderate"
    elif value <= 360:
        return "Unhealthy for Sensitive Groups"
    elif value <= 649:
        return "Unhealthy"
    else:
        return "Very Unhealthy"

### Load Dataset

In [4]:
df= pd.read_csv("df_ispa_dki_all.csv")
df['date'] = pd.to_datetime(df['date'])

In [None]:
df.isna().sum()

### Summary

In [None]:
df_over_year = df.pivot_table(
    index=['station'],
    columns=['year'],
    values=['pm25'],
    aggfunc='mean'
).round(2)

In [22]:
def get_summary(df, col_place, col_value, mapfunc):
    df = df.copy()
    
    places = df[col_place].unique()
    val_counts = {}
    for place in places:
        _df = df.query(f"{col_place}=='{place}'")[col_value]        
        val_counts[place] = _df.map(mapfunc).value_counts().to_dict()

    df_val_counts = pd.DataFrame(val_counts)
    df_val_counts.index.name = col_value
    df_val_counts.columns.name = col_place
    
    return df_val_counts.fillna(0).astype(int)

In [23]:
get_summary(df, 'district', 'pm10', pm10_to_category)

district,Jakarta Pusat,Jakarta Utara,Jakarta Timur,Jakarta Selatan,Jakarta Barat
pm10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Moderate,2936,3003,3048,1945,2117
Good,1746,1400,937,2327,1485
No Record,106,176,600,252,197
Unhealthy for Sensitive Groups,2,6,158,2,10
Unhealthy,0,0,8,0,0


In [19]:
get_summary(df, 'district', 'pm25', pm2_5_to_category)

district,Jakarta Pusat,Jakarta Utara,Jakarta Timur,Jakarta Selatan,Jakarta Barat
pm25,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
No Record,3910.0,3684.0,3656.0,3631.0,3208.0
Unhealthy,667.0,702.0,993.0,719.0,503.0
Unhealthy for Sensitive Groups,167.0,169.0,78.0,117.0,76.0
Moderate,46.0,30.0,6.0,58.0,22.0
Very Unhealthy,0.0,0.0,18.0,0.0,0.0
Good,0.0,0.0,0.0,1.0,0.0


In [21]:
get_summary(df, 'district', 'no2', no2_to_category)

district,Jakarta Pusat,Jakarta Utara,Jakarta Timur,Jakarta Selatan,Jakarta Barat
no2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Good,4694,4507,4616,4237,3653
No Record,83,39,106,237,120
Moderate,13,37,26,48,25
Unhealthy for Sensitive Groups,0,2,3,4,11


In [13]:
get_summary(df, 'district', 'co', co_to_category)

district,Jakarta Pusat,Jakarta Utara,Jakarta Timur,Jakarta Selatan,Jakarta Barat
co,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Very Unhealthy,3542,2273,2249,2412,2304
Moderate,421,851,877,761,496
Unhealthy for Sensitive Groups,390,716,683,598,455
Unhealthy,339,660,677,589,398
Good,46,44,89,111,60


In [None]:
get_summary(df, 'district', 'so2', so2_to_category)

In [None]:
get_summary(df, 'district', 'o3', o3_to_category)

In [None]:
df_over_year

In [None]:
df_over_year.bfill(axis=1, inplace=True)

In [None]:
df_over_year.style.map(lambda x: f"background:{map_color_values[pm10_to_category(x)]}").format(precision=2)

In [None]:
df_over_year.style.map(lambda x: map_color_values[pm10_to_category[x]])

In [None]:
df_over_year.style.background_gradient(cmap="Reds", axis=0)

In [None]:
df_over_year.style.highlight_max(color='red').format(precision=2)

In [None]:
df['category'].unique()