# **Air Quality Monitoring Application**

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import json
import requests
import matplotlib.pyplot as plt
from datetime import datetime

### **Extracting data from json**

In [2]:
file_path = r"./site_ids.json"

with open(file_path, "r") as file_obj:

    site_df = json.load(file_obj)

In [3]:
site_ids = [site_id['id'] for site_id in site_df]

### **API Link**

In [4]:
"https://atmos.urbansciences.in/adp/v4/getDeviceDataParam/imei/%7bsite_id%7d/params/%7bparams%7d/startdate/2023-12-29T00:00/enddate/2024-12-31T00:00/ts/mm/avg/15/api/63h3AckbgtY?gaps=1&gap_value=NaN"

'https://atmos.urbansciences.in/adp/v4/getDeviceDataParam/imei/%7bsite_id%7d/params/%7bparams%7d/startdate/2023-12-29T00:00/enddate/2024-12-31T00:00/ts/mm/avg/15/api/63h3AckbgtY?gaps=1&gap_value=NaN'

In [5]:

"http://atmos.urbansciences.in/adp/v4/getDeviceDataParam/imei/{site_id}/params/{params}/startdate/{start_date}/enddate/{end_date}/ts/mm/avg/15/api/{api_key}?gaps=1&gap_value=NaN"

'http://atmos.urbansciences.in/adp/v4/getDeviceDataParam/imei/{site_id}/params/{params}/startdate/{start_date}/enddate/{end_date}/ts/mm/avg/15/api/{api_key}?gaps=1&gap_value=NaN'

In [6]:
params = "pm2.5cnc,pm10cnc"
start_date = "2024-01-01T00:00"
end_date = f"{datetime.now().strftime('%Y-%m-%d')}T00:00"
timeframe = "15"
api_key = "63h3AckbgtY"

dfs = []
for site_id in site_ids:
    url = f"http://atmos.urbansciences.in/adp/v4/getDeviceDataParam/imei/{site_id}/params/{params}/startdate/{start_date}/enddate/{end_date}/ts/mm/avg/15/api/{api_key}?gaps=1&gap_value=NaN"

    response = requests.get(url)
    if response.status_code == 200:
        api_df = pd.read_csv(url)
        api_df['site_id'] = site_id
        dfs.append(api_df)
    else:
        print(f"Failed to fetch data for {site_id}")


In [7]:
df = pd.concat(dfs, ignore_index=True)
df.head()

Unnamed: 0,dt_time,pm2.5cnc,pm10cnc,deviceid,site_id
0,2024-01-01 00:00:00,120.09,198.98,site_104,site_104
1,2024-01-01 00:15:00,121.28,200.83,site_104,site_104
2,2024-01-01 00:30:00,111.9,183.9,site_104,site_104
3,2024-01-01 00:45:00,113.31,162.37,site_104,site_104
4,2024-01-01 01:00:00,102.87,156.25,site_104,site_104


In [8]:
df.isna().sum()

dt_time          0
pm2.5cnc    657804
pm10cnc     667212
deviceid         0
site_id          0
dtype: int64

In [9]:
(df.isna().sum()/df.shape[0])*100

dt_time      0.000000
pm2.5cnc    17.051030
pm10cnc     17.294896
deviceid     0.000000
site_id      0.000000
dtype: float64

In [10]:
df['dt_time'] = pd.to_datetime(df['dt_time'])

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3857855 entries, 0 to 3857854
Data columns (total 5 columns):
 #   Column    Dtype         
---  ------    -----         
 0   dt_time   datetime64[ns]
 1   pm2.5cnc  float64       
 2   pm10cnc   float64       
 3   deviceid  object        
 4   site_id   object        
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 147.2+ MB


In [13]:
file_path = "data.csv"
with open(file_path, 'w') as file_obj:
    file_obj.write(df.to_csv())

In [None]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,dt_time,pm2.5cnc,pm10cnc,deviceid,site_id
0,0,2024-01-01 00:00:00,120.09,198.98,site_104,site_104
1,1,2024-01-01 00:15:00,121.28,200.83,site_104,site_104
2,2,2024-01-01 00:30:00,111.9,183.9,site_104,site_104
3,3,2024-01-01 00:45:00,113.31,162.37,site_104,site_104
4,4,2024-01-01 01:00:00,102.87,156.25,site_104,site_104


In [None]:
df.shape

(3857855, 6)