<a href="https://colab.research.google.com/github/PrzemyslawSarnacki/AirQualityPrediction/blob/master/PredictionNotebook.ipynb" target="_parent">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> 

In [3]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import openaq
import warnings

warnings.simplefilter('ignore')

%matplotlib inline

# Set major seaborn asthetics
sns.set("notebook", style='ticks', font_scale=1.0)

# Increase the quality of inline plots
mpl.rcParams['figure.dpi']= 500


In [4]:
api = openaq.OpenAQ()
resp = api.cities(df=True, limit=10000)

# display the first 10 rows
resp.query("country == 'PL'")

Unnamed: 0,country,name,city,count,locations
2001,PL,Augustów,Augustów,88266,2
2002,PL,Belsk,Belsk,32105,1
2003,PL,Belsk Duży,Belsk Duży,87034,1
2004,PL,Biała,Biała,27573,1
2005,PL,Biała Podlaska,Biała Podlaska,106955,1
...,...,...,...,...,...
2171,PL,Zielonka,Zielonka,98239,1
2172,PL,Złoty Potok,Złoty Potok,133381,1
2173,PL,Żory,Żory,48437,1
2174,PL,Żyrardów,Żyrardów,45537,1


In [5]:
CITIES = ["Warszawa", "Kraków", "Poznań", "Katowice", "Białystok"]
PARAMETERS = ['pm25', 'pm10', 'no2', 'so2', 'o3', 'co']
LOCATIONS = ["Warszawa-Śródmieście", "Kraków-", "Poznań", "Katowice", "Białystok-Miejska", "WIOŚ Elbląg ul. Bażyńskiego"]

In [6]:
import unicodedata

def strip_accents(text):
    return ''.join(c for c in unicodedata.normalize('NFKD', text.lower().replace("ł","l")) if unicodedata.category(c) != 'Mn')


In [7]:
def check_params():
    return all(param in res.parameter.unique() for param in PARAMETERS) 

dfs = {}

for city in CITIES:
    print(city)
    dfs[strip_accents(city)] = pd.DataFrame()
    for page in range(1,11):
        res = api.measurements(city=city, page=page, limit=10000, df=True)
        print(check_params())
        if check_params():
            dfs[strip_accents(city)] = dfs[strip_accents(city)].append(res)


Warszawa
True
True
True
True
True
True
True
True
True
True
Kraków
True
True
True
True
True
True
True
True
True
True
Poznań
True
True
True
True
True
True
True
True
True
True
Katowice
True
True
True
True
True
True
True
True
True
True
Białystok
True
True
True
True
True
True
True
True
True
False


In [8]:
dfs["bialystok"].head()

Unnamed: 0_level_0,location,parameter,value,unit,country,city,date.utc,coordinates.latitude,coordinates.longitude
date.local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-12-22 16:00:00,Białystok-Warszawska,o3,22.0,b'\xc2\xb5g/m\xc2\xb3',PL,Białystok,2020-12-22 15:00:00+00:00,53.129306,23.181744
2020-12-22 16:00:00,Białystok-Miejska,no2,10.1,b'\xc2\xb5g/m\xc2\xb3',PL,Białystok,2020-12-22 15:00:00+00:00,53.126689,23.155869
2020-12-22 16:00:00,Białystok-Miejska,co,417.0,b'\xc2\xb5g/m\xc2\xb3',PL,Białystok,2020-12-22 15:00:00+00:00,53.126689,23.155869
2020-12-22 16:00:00,Białystok-Miejska,bc,0.84,b'\xc2\xb5g/m\xc2\xb3',PL,Białystok,2020-12-22 15:00:00+00:00,53.126689,23.155869
2020-12-22 16:00:00,Białystok-Warszawska,pm10,26.28,b'\xc2\xb5g/m\xc2\xb3',PL,Białystok,2020-12-22 15:00:00+00:00,53.129306,23.181744


In [9]:
organized_dfs = {}

for city in CITIES:    
    df_avg = pd.DataFrame()
    for param in PARAMETERS:
        df_avg[param] = dfs[strip_accents(city)].loc[dfs[strip_accents(city)]["parameter"] == param].resample('D').mean()["value"]
        organized_dfs[strip_accents(city)] = df_avg
        # .drop(columns=["coordinates.latitude", "coordinates.longitude"])
    

In [10]:
organized_dfs

{'warszawa':                  pm25       pm10        no2       so2         o3           co
 date.local                                                                   
 2020-05-09  22.713636  48.241250  73.736667  3.100000  18.837143   870.000000
 2020-05-10  16.482353  23.090882  23.814167  5.457083  64.008687   717.166667
 2020-05-11  16.649850  25.342030  24.885432  6.765833  65.633646   815.083333
 2020-05-12   3.088226   3.746377  11.982917  4.730833  57.013125   709.500000
 2020-05-13  12.944483  19.946897  30.575556  3.646667  65.377778   755.333333
 ...               ...        ...        ...       ...        ...          ...
 2020-12-18  37.777281  42.442982  29.465543  6.770435   6.868626  1334.217391
 2020-12-19  36.421228  40.852895  23.945761  4.084348   8.846803  1185.043478
 2020-12-20  30.318833  31.197000  17.121458  4.358333  18.062042   988.041667
 2020-12-21  30.687833  32.746083  23.669479  5.507917  15.765789  1196.083333
 2020-12-22  34.622824  35.597882  23.79

In [16]:
for i in organized_dfs:
    print(i)

warszawa
krakow
poznan
katowice
bialystok
