In [70]:
from datetime import datetime
import json
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import entropy
import requests
import math
import seaborn as sns

%matplotlib inline

In [83]:
# OpenAQ API request URL
aq_requrl = "https://api.openaq.org/v2/averages"

# OpenAQ API request's parameters
aq_reqparams = {
'date_from': '2022-08-31T00:00:00+00:00',
'date_to': '2022-09-01T00:00:00+00:00',
'country_id' : 'UA',
'limit': 1000,
'page': 1,
'offset': 0,
'sort': 'asc',
'spatial': 'country',
'temporal': 'hour',
'group': 'true'
}

aq_resp = requests.get(aq_requrl, aq_reqparams)
aq_resp_json = aq_resp.json()

aq_resp_df = pd.DataFrame(aq_resp_json['results'], columns=['id','hour','parameter','parameterId','average','measurement_count'])    
aq_resp_df


Unnamed: 0,id,hour,parameter,parameterId,average,measurement_count
0,[58378],2022-08-31T00:00:00+00:00,pm10,1,17.3359,64
1,[58378],2022-08-31T00:00:00+00:00,pm25,2,15.4063,64
2,[58378],2022-08-31T00:00:00+00:00,pm1,19,9.9109,64
3,[58378],2022-08-31T00:00:00+00:00,um010,126,0.8269,64
4,[58378],2022-08-31T00:00:00+00:00,um025,130,0.0828,64
...,...,...,...,...,...,...
139,[58378],2022-08-31T23:00:00+00:00,pm25,2,16.7735,49
140,[58378],2022-08-31T23:00:00+00:00,pm1,19,11.3694,49
141,[58378],2022-08-31T23:00:00+00:00,um010,126,1.0018,49
142,[58378],2022-08-31T23:00:00+00:00,um025,130,0.0622,49


In [73]:
aq_resp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 144 non-null    object 
 1   hour               144 non-null    object 
 2   parameter          144 non-null    object 
 3   parameterId        144 non-null    int64  
 4   average            144 non-null    float64
 5   measurement_count  144 non-null    int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 6.9+ KB


In [84]:
aq_resp_pm25_df = aq_resp_df.query('parameterId == 2')
aq_resp_pm25_df

Unnamed: 0,id,hour,parameter,parameterId,average,measurement_count
1,[58378],2022-08-31T00:00:00+00:00,pm25,2,15.4063,64
7,[58378],2022-08-31T01:00:00+00:00,pm25,2,15.8638,47
13,[58378],2022-08-31T02:00:00+00:00,pm25,2,17.1879,58
19,[58378],2022-08-31T03:00:00+00:00,pm25,2,15.717,47
25,[58378],2022-08-31T04:00:00+00:00,pm25,2,17.6019,53
31,[58378],2022-08-31T05:00:00+00:00,pm25,2,13.7753,73
37,[58378],2022-08-31T06:00:00+00:00,pm25,2,10.2679,53
43,[58378],2022-08-31T07:00:00+00:00,pm25,2,5.8089,56
49,[58378],2022-08-31T08:00:00+00:00,pm25,2,4.534,53
55,[58378],2022-08-31T09:00:00+00:00,pm25,2,5.9368,57


In [85]:
aq_resp_pm25_df.shape

(24, 6)

In [86]:
aq_resp_df.query('parameterId == 2').info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24 entries, 1 to 139
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 24 non-null     object 
 1   hour               24 non-null     object 
 2   parameter          24 non-null     object 
 3   parameterId        24 non-null     int64  
 4   average            24 non-null     float64
 5   measurement_count  24 non-null     int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 1.3+ KB


In [87]:
# Using DataFrame.mean() method to get 'average' column average
aq_resp_pm25_24_mean = aq_resp_pm25_df['average'].mean()

print(type(aq_resp_pm25_24_mean))

if aq_resp_pm25_24_mean <= 15.0:
    print('Daily 24-hour mean concentrations of PM2.5 fine particles equal %s <= 15 µg/m³' % format(aq_resp_pm25_24_mean, '.0f'))
else:
    print('Daily 24-hour mean concentrations of PM2.5 fine particles equal %s > 15 µg/m³' % format(aq_resp_pm25_24_mean, ".0f"))


<class 'numpy.float64'>
Daily 24-hour mean concentrations of PM2.5 fine particles equal 9 <= 15 µg/m³


In [90]:
def epa_aqi_pm25_calc(row):
    
    pm25_avg = round(row['average'], 1)
        
    if pm25_avg >= 0.0 and pm25_avg <= 12.0:
        print('1. Good')
        pm25_lo = 0.0
        pm25_hi = 12.0
        aqi_pm25_lo = 0
        aqi_pm25_hi = 50
    elif pm25_avg >= 12.1 and pm25_avg <= 35.4:
        print('2. Moderate')
        epa_aqi_pm25_weight = 2
        pm25_lo = 12.1
        pm25_hi = 35.4
        aqi_pm25_lo = 51
        aqi_pm25_hi = 100
    elif pm25_avg >= 35.5 and pm25_avg <= 55.4:
        print('3. Unhealthy for Sensitive Group')
        pm25_lo = 35.5
        pm25_hi = 55.4
        aqi_pm25_lo = 101
        aqi_pm25_hi = 150
    elif pm25_avg >= 55.5 and pm25_avg <= 150.4:
        print('4. Unhealthy')
        pm25_lo = 55.5
        pm25_hi = 150.4
        aqi_pm25_lo = 151
        aqi_pm25_hi = 200        
    elif pm25_avg >= 150.5 and pm25_avg <= 250.4:
        print('5. Very unhealthy')
        pm25_lo = 150.5
        pm25_hi = 250.4
        aqi_pm25_lo = 201
        aqi_pm25_hi = 300 
    elif pm25_avg >= 250.5 and pm25_avg <= 500.4:
        print('6. Hazardous')
        pm25_lo = 250.5
        pm25_hi = 500.4
        aqi_pm25_lo = 301
        aqi_pm25_hi = 500         
        
    pm25_aqi = (aqi_pm25_hi - aqi_pm25_lo) / (pm25_hi - pm25_lo) * (pm25_avg - pm25_lo) + aqi_pm25_lo
        
    return math.trunc(pm25_aqi)

aq_resp_pm25_df['epa_aqi_pm25'] = aq_resp_pm25_df.apply(epa_aqi_pm25_calc, axis = 1)

print(aq_resp_pm25_df['epa_aqi_pm25'].info())

aq_resp_pm25_df


        
        
        
    

2. Moderate
2. Moderate
2. Moderate
2. Moderate
2. Moderate
2. Moderate
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
2. Moderate
2. Moderate
2. Moderate
<class 'pandas.core.series.Series'>
Int64Index: 24 entries, 1 to 139
Series name: epa_aqi_pm25
Non-Null Count  Dtype
--------------  -----
24 non-null     int64
dtypes: int64(1)
memory usage: 384.0 bytes
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aq_resp_pm25_df['epa_aqi_pm25'] = aq_resp_pm25_df.apply(epa_aqi_pm25_calc, axis = 1)


Unnamed: 0,id,hour,parameter,parameterId,average,measurement_count,epa_aqi_pm25,epa_aqi_pm25_level
1,[58378],2022-08-31T00:00:00+00:00,pm25,2,15.4063,64,57,2
7,[58378],2022-08-31T01:00:00+00:00,pm25,2,15.8638,47,58,2
13,[58378],2022-08-31T02:00:00+00:00,pm25,2,17.1879,58,61,2
19,[58378],2022-08-31T03:00:00+00:00,pm25,2,15.717,47,58,2
25,[58378],2022-08-31T04:00:00+00:00,pm25,2,17.6019,53,62,2
31,[58378],2022-08-31T05:00:00+00:00,pm25,2,13.7753,73,54,2
37,[58378],2022-08-31T06:00:00+00:00,pm25,2,10.2679,53,42,1
43,[58378],2022-08-31T07:00:00+00:00,pm25,2,5.8089,56,24,1
49,[58378],2022-08-31T08:00:00+00:00,pm25,2,4.534,53,18,1
55,[58378],2022-08-31T09:00:00+00:00,pm25,2,5.9368,57,24,1


In [91]:
def epa_aqi_pm25_level_calc(row):
    
    epa_aqi_pm25 = row['epa_aqi_pm25']
    
    if epa_aqi_pm25 >= 0 and epa_aqi_pm25 <= 50:
        print('1. Good')
        epa_aqi_pm25_level = 1
    elif epa_aqi_pm25 >= 51 and epa_aqi_pm25 <= 100:
        print('2. Moderate')
        epa_aqi_pm25_level = 2        
    elif epa_aqi_pm25 >= 101 and epa_aqi_pm25 <= 150:
        print('3. Unhealthy for Sensitive Group')
        epa_aqi_pm25_level = 3        
    elif epa_aqi_pm25 >= 151 and epa_aqi_pm25 <= 200:
        print('4. Unhealthy')
        epa_aqi_pm25_level = 4        
    elif epa_aqi_pm25 >= 201 and epa_aqi_pm25 <= 300:
        print('5. Very Unhealthy')
        epa_aqi_pm25_level = 5 
    elif epa_aqi_pm25 >= 301 and epa_aqi_pm25 <= 500:
        print('6. Hazardous')
        epa_aqi_pm25_level = 6 
      
    return epa_aqi_pm25_level


aq_resp_pm25_df['epa_aqi_pm25_level'] = aq_resp_pm25_df.apply(epa_aqi_pm25_level_calc, axis = 1)

aq_resp_pm25_df


        
        
        
    

2. Moderate
2. Moderate
2. Moderate
2. Moderate
2. Moderate
2. Moderate
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
1. Good
2. Moderate
2. Moderate
2. Moderate


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aq_resp_pm25_df['epa_aqi_pm25_level'] = aq_resp_pm25_df.apply(epa_aqi_pm25_level_calc, axis = 1)


Unnamed: 0,id,hour,parameter,parameterId,average,measurement_count,epa_aqi_pm25,epa_aqi_pm25_level
1,[58378],2022-08-31T00:00:00+00:00,pm25,2,15.4063,64,57,2
7,[58378],2022-08-31T01:00:00+00:00,pm25,2,15.8638,47,58,2
13,[58378],2022-08-31T02:00:00+00:00,pm25,2,17.1879,58,61,2
19,[58378],2022-08-31T03:00:00+00:00,pm25,2,15.717,47,58,2
25,[58378],2022-08-31T04:00:00+00:00,pm25,2,17.6019,53,62,2
31,[58378],2022-08-31T05:00:00+00:00,pm25,2,13.7753,73,54,2
37,[58378],2022-08-31T06:00:00+00:00,pm25,2,10.2679,53,42,1
43,[58378],2022-08-31T07:00:00+00:00,pm25,2,5.8089,56,24,1
49,[58378],2022-08-31T08:00:00+00:00,pm25,2,4.534,53,18,1
55,[58378],2022-08-31T09:00:00+00:00,pm25,2,5.9368,57,24,1


In [92]:
def epa_aqi_pm25_level_entropy_calc(labels, base):
        
    value, counts = np.unique(labels, return_counts=True)
    
    epa_aqi_pm25_level_entropy = entropy(counts, base=base)
    
    return epa_aqi_pm25_level_entropy


epa_aqi_pm25_level_lst = list(aq_resp_pm25_df['epa_aqi_pm25_level'])

print(f'epa_aqi_pm25_level_lst={epa_aqi_pm25_level_lst}', type(epa_aqi_pm25_level_lst), '\n')

epa_aqi_pm25_level_entropy = epa_aqi_pm25_level_entropy_calc(epa_aqi_pm25_level_lst, 2)

print(f'epa_aqi_pm25_level_entropy={epa_aqi_pm25_level_entropy}', type(epa_aqi_pm25_level_entropy), '\n')



epa_aqi_pm25_level_lst=[2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2] <class 'list'> 

epa_aqi_pm25_level_entropy=0.954434002924965 <class 'numpy.float64'> 



In [93]:
# Degree of consistency per unit of mean levels of the average daily concentration of particles PM2.5

# Using DataFrame.mean() method to get 'epa_aqi_pm25_level' column average
aq_resp_pm25_level_mean = aq_resp_pm25_df['epa_aqi_pm25_level'].mean()

aq_resp_pm25_degree_of_consistency = epa_aqi_pm25_level_entropy / aq_resp_pm25_level_mean  

print(f'aq_resp_pm25_degree_of_consistency={aq_resp_pm25_degree_of_consistency}', type(aq_resp_pm25_degree_of_consistency), '\n')

aq_resp_pm25_degree_of_consistency=0.6941338203090655 <class 'numpy.float64'> 

