## Begining with the code analysis at first


In [172]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [173]:
# Standard AQI breakpoints for PM2.5 (µg/m³)
pm25_breakpoints = [
    (0, 12.0),      # Good (0-50)
    (12.1, 35.4),   # Moderate (51-100)
    (35.5, 55.4),   # Unhealthy for Sensitive Groups (101-150)
    (55.5, 150.4),  # Unhealthy (151-200)
    (150.5, 250.4), # Very Unhealthy (201-300)
    (250.5, 500.4)  # Hazardous (301-500)
]

# Standard AQI breakpoints for PM10 (µg/m³)
pm10_breakpoints = [
    (0, 54),        # Good (0-50)
    (55, 154),      # Moderate (51-100)
    (155, 254),     # Unhealthy for Sensitive Groups (101-150)
    (255, 354),     # Unhealthy (151-200)
    (355, 424),     # Very Unhealthy (201-300)
    (425, 604)      # Hazardous (301-500)
]
# Corresponding AQI ranges
aqi_ranges = [
    (0, 50),
    (51, 100),
    (101, 150),
    (151, 200),
    (201, 300),
    (301, 500)
]

In [174]:
def calculate_aqi(pm_value, breakpoints):
    """
    Calculate AQI based on PM value and breakpoints.
    """
    for i, (low_pm, high_pm) in enumerate(breakpoints):
        if low_pm <= pm_value <= high_pm:
            low_aqi, high_aqi = aqi_ranges[i]
            return ((high_aqi - low_aqi) / (high_pm - low_pm)) * (pm_value - low_pm) + low_aqi
    return None  # if value exceeds highest breakpoint

In [175]:
def load_and_prepare_data(file_path):
    """
    Load the dataset and ensure the 'Date' column is in datetime format.
    """
    data = pd.read_csv(file_path)
    data['Date'] = pd.to_datetime(data['Date'])
    return data

In [176]:
def compute_aqi(data):
    """
    Compute AQI for PM2.5 and PM10, then combine them by taking the maximum for each day.
    """
    aqi25 = {}
    aqi10 = {}
    
    for i in range(len(data['Date'])):
        # Calculate AQI for PM2.5
        pm25_value = data['pm25'].iloc[i]
        if not np.isnan(pm25_value):
            aqi_val = calculate_aqi(pm25_value, pm25_breakpoints)
            date = data['Date'].iloc[i]
            if date not in aqi25 or (aqi_val is not None and (aqi25[date] is None or aqi_val > aqi25[date])):
                aqi25[date] = aqi_val
        
        # Calculate AQI for PM10
        pm10_value = data['pm10'].iloc[i]
        if not np.isnan(pm10_value):
            aqi_val = calculate_aqi(pm10_value, pm10_breakpoints)
            date = data['Date'].iloc[i]
            if date not in aqi10 or (aqi_val is not None and (aqi10[date] is None or aqi_val > aqi10[date])):
                aqi10[date] = aqi_val
    
    # Combine AQI values (taking the maximum of PM2.5 and PM10 for each day)
    aqi_combined = {}
    for date in set(aqi25.keys()).union(set(aqi10.keys())):
        aqi_pm25 = aqi25.get(date, 0)
        aqi_pm10 = aqi10.get(date, 0)
        aqi_combined[date] = max(aqi_pm25, aqi_pm10) if (aqi_pm25 is not None and aqi_pm10 is not None) else (aqi_pm25 or aqi_pm10)
    
    return aqi_combined

In [177]:
def plot_aqi_time_series(aqi_data, year):
    """
    Plot the AQI time series with horizontal lines for AQI thresholds.
    """
    # Sort the AQI data by date
    sorted_aqi = dict(sorted(aqi_data.items(), key=lambda x: x[0]))
    
    # Create the time series plot
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(sorted_aqi.keys()),
        y=list(sorted_aqi.values()),
        mode='lines',
        line=dict(color='royalblue', width=2),
        name='AQI Time Series'
    ))
    
    # Add horizontal lines for AQI thresholds
    fig.add_shape(
        type="line",
        x0=min(sorted_aqi.keys()),
        x1=max(sorted_aqi.keys()),
        y0=155,
        y1=155,
        line=dict(color="orange", width=1, dash="dash"),
    )
    fig.add_shape(
        type="line",
        x0=min(sorted_aqi.keys()),
        x1=max(sorted_aqi.keys()),
        y0=200,
        y1=200,
        line=dict(color="red", width=1, dash="dash"),
    )
    
    # Add dummy traces for the legend
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='lines',
        line=dict(color="orange", width=1, dash="dash"),
        name="Unhealthy for Sensitive Groups"
    ))
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='lines',
        line=dict(color="red", width=1, dash="dash"),
        name="Unhealthy"
    ))
    
    # Update layout
    fig.update_layout(
        title=f'AQI Time Series for {year}',
        xaxis_title='Date',
        yaxis_title='AQI',
        template='plotly_white',
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="right",
            x=1.2
        )
    )
    
    fig.show()

In [178]:
def analyze_year(file_path, year):
    """
    Analyze AQI for a given year: load data, compute AQI, and plot the time series.
    """
    data = load_and_prepare_data(file_path)
    aqi_data = compute_aqi(data)
    plot_aqi_time_series(aqi_data, year)
    
    # Optional: Return AQI data as a DataFrame for further analysis
    aqi_df = pd.DataFrame(list(aqi_data.items()), columns=['Date', 'AQI'])
    return aqi_df

In [179]:
aqi_2021 = analyze_year('data/Kathmandu_2021.csv', '2021')

In [180]:
aqi_2022 = analyze_year('data/Kathmandu_2022.csv', '2022')

In [183]:
aqi_2023=analyze_year('data/Kathmandu_2023.csv','2023')

In [184]:
aqi_2024=analyze_year('data/Kathmandu_2024_2025.csv','2024-2025')