## Begining with the code analysis at first


In [550]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [551]:
import pandas as pd
import plotly.graph_objects as go

def plot_aqi_time_series(data, year):
    """
    Plot the AQI time series with horizontal lines for AQI thresholds.
    
    Parameters:
    data (pd.DataFrame): DataFrame containing the AQI data with 'date', 'pm25', and 'pm10' columns
    year (int or str): The year being plotted (used in title)
    """
    # Create the time series plot
    fig = go.Figure()
    data['Date'] = pd.to_datetime(data['Date'])
    
    # Calculate AQI as the maximum of pm2.5 and pm10 for each point
    aqi_values = data[['pm25', 'pm10']].max(axis=1)
    
    fig.add_trace(go.Scatter(
        x=data['Date'],
        y=aqi_values,
        mode='lines',
        line=dict(color='royalblue', width=2),
        name='AQI Time Series'
    ))

    # Add horizontal lines for AQI thresholds
    fig.add_shape(
        type="line",
        x0=data['Date'].min(),
        x1=data['Date'].max(),
        y0=51,
        y1=51,
        line=dict(color="orange", width=1, dash="dash"),
    )
    fig.add_shape(
        type="line",
        x0=data['Date'].min(),
        x1=data['Date'].max(),
        y0=150,
        y1=150,
        line=dict(color="red", width=1, dash="dash"),
    )

    # Add dummy traces for the legend
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='lines',
        line=dict(color="orange", width=1, dash="dash"),
        name="moderate (AQI > 50)"
    ))
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='lines',
        line=dict(color="red", width=1, dash="dash"),
        name="Unhealthy (AQI > 150)"
    ))

    # Update layout
    fig.update_layout(
        title=f'AQI Time Series for {year}',
        xaxis_title='Date',
        yaxis_title='AQI',
        template='plotly_white',
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="right",
            x=1.2
        )
    )

    fig.show()
    #fig.write_html(f"Aqi_time_{year}.html")
    return fig



In [552]:
# First read the CSV file into a DataFrame
data_2019 = pd.read_csv('data/Kathmandu_2019.csv')

# Then pass the DataFrame to the function
aqi_2021 = plot_aqi_time_series(data_2019, '2019')

In [553]:
# First read the CSV file into a DataFrame
data_2020 = pd.read_csv('data/Kathmandu_2020.csv')

# Then pass the DataFrame to the function
aqi_2021 = plot_aqi_time_series(data_2020, '2020')

In [554]:
# First read the CSV file into a DataFrame
data_2021 = pd.read_csv('data/Kathmandu_2021.csv')

# Then pass the DataFrame to the function
aqi_2021 = plot_aqi_time_series(data_2021, '2021')

In [555]:
data_2022 = pd.read_csv('data/Kathmandu_2022.csv')

# Then pass the DataFrame to the function
aqi_2022 = plot_aqi_time_series(data_2022, '2022')

In [556]:
data_2023 = pd.read_csv('data/Kathmandu_2023.csv')

# Then pass the DataFrame to the function
aqi_2023 = plot_aqi_time_series(data_2023, '2023')

In [557]:
data_2024 = pd.read_csv('data/Kathmandu_2024_2025.csv')

# Then pass the DataFrame to the function
aqi_2024 = plot_aqi_time_series(data_2024, '2024-2025')

Now to find the time when the is much higher in the certain year

In [558]:
def load_fire_data(path):
    fire_data = pd.read_csv(path)
    fire_data['acq_date'] = pd.to_datetime(fire_data['acq_date'])

    if fire_data['confidence'].dtype == object:
        weight_map = {'low': 0.5, 'nominal': 1.0, 'high': 1.5}
        fire_data['weight'] = fire_data['confidence'].map(weight_map)
    else:
        fire_data['weight'] = fire_data['confidence'] / 100.0

    fire_data['weighted_frp'] = fire_data['frp'] * fire_data['weight']

    daily_fires = fire_data.groupby('acq_date').agg({
        'latitude': 'count',
        'weighted_frp': 'sum',
        'weight': 'sum'
    }).rename(columns={'latitude': 'fire_count'})

    daily_fires['avg_frp'] = daily_fires['weighted_frp'] / daily_fires['weight']

    daily_fires = daily_fires[['fire_count', 'avg_frp']]
    daily_fires=daily_fires.reset_index()
    daily_fires.index.name=None
    return daily_fires


In [559]:
fire_data_2019=load_fire_data('data/modis_2019_Nepal.csv')
fire_data_2020=load_fire_data('data/modis_2020_Nepal.csv')
fire_data_2021=load_fire_data('data/modis_2021_Nepal.csv')
fire_data_2022=load_fire_data('data/modis_2022_Nepal.csv')
fire_data_2023=load_fire_data('data/modis_2023_Nepal.csv')


In [560]:
def data_merging(data, fire_data):
    combined_data = []
    for i in range(len(data)):
        if data['Date'].iloc[i] in fire_data['acq_date'].values:
            combined_data.append({
                'date': data['Date'].iloc[i],
                'aqi':max(data['pm25'].iloc[i],data['pm10'].iloc[i]),
                'fire_count': fire_data.loc[fire_data['acq_date'] == data['Date'].iloc[i], 'fire_count'].values[0],
                'avg_frp': fire_data.loc[fire_data['acq_date'] == data['Date'].iloc[i], 'avg_frp'].values[0]
            })
    combined_data_df = pd.DataFrame(combined_data)
    return combined_data_df
merged_data_2019=data_merging(data_2019,fire_data_2019)
merged_data_2020=data_merging(data_2020,fire_data_2020)
merged_data_2021 = data_merging(data_2021, fire_data_2021)
merged_data_2022 = data_merging(data_2022, fire_data_2022)
merged_data_2023 = data_merging(data_2023, fire_data_2023)
            

In [561]:
def scatter_plot_aqi_forest_fire(data,year):
    fig = px.scatter(
        data,
        x='aqi',
        y='fire_count',
        color='avg_frp',             # FRP as color (legend)
        hover_name='date',       # Date on hover
        color_continuous_scale='OrRd',
        labels={
            'aqi': 'Air Quality Index (AQI)',
            'fire_count': 'Fire Count',
            'avg_frp': 'FRP'
        },
        title=f'Fire Count vs AQI {year}',
        template='plotly_white'
    )

    fig.update_traces(marker=dict(line=dict(width=1, color='black'), opacity=0.7))
    fig.update_layout(legend_title='FRP')

    #fig.write_html(f'Forest_aqi_{year}.html')
    fig.show()
scatter_plot_aqi_forest_fire(merged_data_2019,2019)
scatter_plot_aqi_forest_fire(merged_data_2020,2020)
scatter_plot_aqi_forest_fire(merged_data_2021,2021)
scatter_plot_aqi_forest_fire(merged_data_2022,2022)
scatter_plot_aqi_forest_fire(merged_data_2023,2023)

In [562]:
def load_and_prepare_dataset(df):
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['month_name'] = df['date'].dt.strftime('%B')
    return df


In [563]:
def combine_datasets(dfs):
    combined_df = pd.concat(dfs, ignore_index=True)
    return load_and_prepare_dataset(combined_df)


In [564]:
def analyze_peak_months_horizontal_barplot(df):
    grouped = df.groupby(['year', 'month_name']).agg({
        'aqi': 'mean',
        'fire_count': 'mean'
    }).reset_index()

    peak_months = grouped.loc[grouped.groupby('year')['aqi'].idxmax()].reset_index(drop=True)
    peak_months['label'] = peak_months['month_name'] + " (" + peak_months['year'].astype(str) + ")"

    fig = go.Figure()

    fig.add_trace(go.Bar(
        y=peak_months['label'],
        x=peak_months['aqi'],
        name='Avg AQI',
        orientation='h',
        marker_color='darkorange'
    ))

    fig.add_trace(go.Scatter(
        y=peak_months['label'],
        x=peak_months['fire_count'],
        mode='markers+text',
        name='Avg Fire Count',
        marker=dict(color='blue', size=12),
        text=peak_months['fire_count'].round(1),
        textposition='top right'
    ))

    fig.update_layout(
        title='Yearly Peak AQI Months with Avg Fire Count Overlay',
        xaxis_title='Average AQI',
        yaxis_title='Year (Peak Month)',
        template='plotly_white',
        legend=dict(x=100, y=0.99)
    )
    #fig.write_html('Bar_plot_of_each_year_highest.html')

    fig.show()

In [565]:
processed_dfs=[load_and_prepare_dataset(df) for df in [merged_data_2019,merged_data_2020,merged_data_2021,merged_data_2022,merged_data_2023]]
combined_dfs=combine_datasets(processed_dfs)
analyze_peak_months_horizontal_barplot(combined_dfs)

In [566]:
def analyze_peak_months_horizontal_barplot_frp(df):
    grouped = df.groupby(['year', 'month_name']).agg({
        'aqi': 'mean',
        'avg_frp': 'mean'
    }).reset_index()

    peak_months = grouped.loc[grouped.groupby('year')['aqi'].idxmax()].reset_index(drop=True)
    peak_months['label'] = peak_months['month_name'] + " (" + peak_months['year'].astype(str) + ")"

    fig = go.Figure()

    fig.add_trace(go.Bar(
        y=peak_months['label'],
        x=peak_months['aqi'],
        name='Avg AQI',
        orientation='h',
        marker_color='darkorange'
    ))

    fig.add_trace(go.Scatter(
        y=peak_months['label'],
        x=peak_months['avg_frp'],
        mode='markers+text',
        name='Avg FRP',
        marker=dict(color='blue', size=12),
        text=peak_months['avg_frp'].round(1),
        textposition='top right'
    ))

    fig.update_layout(
        title='Yearly Peak AQI Months with Avg FRP Overlay',
        xaxis_title='Average AQI',
        yaxis_title='Year (Peak Month)',
        template='plotly_white',
        legend=dict(x=0.7, y=0.99)
    )

    #fig.write_html('Bar_plot_of_each_year_highest_with_frp.html')
    fig.show()
analyze_peak_months_horizontal_barplot_frp(combined_dfs)