In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

import requests
from bisect import bisect
import datetime

In [3]:
IHi_list = [50, 100, 200, 300, 400, 500]
ILo_list = [0, 51, 101, 201, 301, 401]
BPHi_list = [30, 60, 90, 120, 250, 500]
BPLo_list = [0, 31, 61, 91, 121, 251]

def pm25_aqi(Cp):
    index = bisect(BPLo_list, Cp) - 1
    aqi = ((IHi_list[index] - ILo_list[index]) / (BPHi_list[index] - BPLo_list[index])) * (Cp - BPLo_list[index]) + ILo_list[index]
    return aqi

def data_by_date(start_date, end_date, location_id):
    api_url = f"https://api.openaq.org/v2/measurements?date_from={start_date}&date_to={end_date}&limit=7000&sort=desc&parameter=pm25&location_id={location_id}&order_by=datetime"
    response = requests.get(api_url)
    print(response)

    results = response.json()["results"]
    df = pd.DataFrame(columns=['datetime', 'pm2.5'])

    for i, result in enumerate(results):
        datetime = result["date"]["local"]
        pm25 = result["value"]

        df.loc[i] = [datetime, pm25]

    df.datetime = pd.to_datetime(df.datetime)

    df['month'] = df['datetime'].dt.strftime('%B')
    df['day'] = df['datetime'].dt.strftime('%d')
    df['year'] = df['datetime'].dt.strftime('%Y')
    df['time'] = df['datetime'].dt.strftime('%H:%M:%S')

    df['sub_index'] = df['pm2.5'].map(pm25_aqi)

    return df

def month_wise_year_data(year, location_id):
    start_date = datetime.date(year, 1, 1).strftime('%Y-%m-%d')
    end_date = datetime.date(year, 12, 1).strftime('%Y-%m-%d')

    year_data = data_by_date(start_date, end_date, location_id)
    final_data = year_data.drop(['pm2.5', 'datetime', 'time'], axis=1).pivot_table(values=['day', 'month', 'year'], index='day', columns=['month', 'year'], aggfunc=np.mean)
    final_data.columns = [' '.join(col).rstrip().replace('sub_index', '') for col in final_data.columns.values]

    return final_data

def filt_cols(df, na_num):
    df = df[df.isna().sum()[df.isna().sum()<na_num].index]
    return df

In [17]:
# Maharashtra Pollution Control Board Bandra location_id: 2582
mpcb_id = 2582

df = month_wise_year_data(2016, mpbc_id)
for i in range(2017, 2024):
    df = pd.concat([df, month_wise_year_data(i, 2582)], axis=1)

df = filt_cols(df, 10)
df

<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>








<Response [200]>






Unnamed: 0_level_0,November 2016,October 2016,November 2017,October 2017,September 2017,August 2018,March 2018,October 2018,September 2018,October 2019,September 2019
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,143.411563,37.01367,115.99107,82.582308,32.453758,,,80.368048,46.287442,15.476705,13.232801
2,94.539823,31.826714,125.565085,71.182039,35.873858,,,81.700833,39.954681,32.464099,14.466487
3,76.756964,37.294253,98.914743,74.684251,26.458445,,,84.317105,42.10077,41.381886,11.006491
4,77.091883,27.247797,126.266335,116.704176,66.322592,,,74.770423,40.051472,33.644581,10.595652
5,78.32493,56.677424,76.343009,134.291453,60.403042,,,74.478442,37.538147,44.65202,12.987674
6,68.812416,43.735932,70.842997,253.646724,46.526834,,,80.243174,43.049716,42.284007,13.924202
7,84.751516,27.874058,78.342922,70.587366,70.056825,,,93.407089,43.2613,54.632005,35.931422
8,68.030798,27.763435,89.044816,38.45,64.624957,,,84.418038,44.03796,57.450325,12.45434
9,67.827333,27.298146,89.619156,85.725888,70.959695,,111.603448,77.042672,41.58498,32.882764,19.039866
10,73.419088,49.587306,113.622967,67.347753,72.451573,22.375877,120.068966,70.056363,36.231966,50.156796,23.842832


In [13]:
fig = px.box(df, None, df.columns, title="Daily Average PM2.5 Sub Index Monthly Distribution")
fig.update_xaxes(title = "Months")
fig.update_yaxes(title = "Daily Average PM2.5 Sub Index")


fig.show()

In [41]:
march_2018_mpcb = data_by_date('2018-03-01', '2018-03-31', mpcb_id)
fig = px.line(march_2018_mpcb, x='datetime', y='sub_index', title="PM2.5 Sub Index for March 2018")
fig.update_yaxes(title='PM2.5 Sub Index')
fig.update_xaxes(title='Datetime')
fig.show()

<Response [200]>


In [51]:
mpcb = data_by_date('2018-09-01', '2018-12-31', mpcb_id)
mpcb = mpcb.drop(mpcb[mpcb.sub_index > 500].index)
fig = px.line(mpcb, 'datetime', 'sub_index', title='PM2.5 Sub Index 2015 to 2020')
fig.update_xaxes(title='Datetime')
fig.update_yaxes(title='PM2.5 Sub Index')
fig.show()

<Response [200]>
