<a href="https://colab.research.google.com/github/PrzemyslawSarnacki/AirQualityPrediction/blob/master/update_data.ipynb" target="_parent">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> 

In [1]:
!pip install py-openaq



In [2]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import openaq
import warnings
from datetime import timedelta

warnings.simplefilter('ignore')

%matplotlib inline

# Set major seaborn asthetics
sns.set("notebook", style='ticks', font_scale=1.0)

# Increase the quality of inline plots
mpl.rcParams['figure.dpi']= 500


In [3]:
api = openaq.OpenAQ()
resp = api.cities(df=True, limit=10000)

# display the first 10 rows
resp.query("country == 'PL'")

Unnamed: 0,country,name,city,count,locations
151,PL,Augustów,Augustów,161132871.0,2
223,PL,Belsk,Belsk,21770.0,1
224,PL,Belsk Duży,Belsk Duży,125822709.0,2
248,PL,Biała,Biała,94279227.0,1
249,PL,Biała Podlaska,Biała Podlaska,128518539.0,1
...,...,...,...,...,...
2449,PL,Zielonka,Zielonka,158649172.0,1
2454,PL,Złoty Potok,Złoty Potok,184236679.0,1
2456,PL,Żory,Żory,63993420.0,1
2458,PL,Żyrardów,Żyrardów,64404821.0,2


In [4]:
CITIES = ["Warszawa", "Kraków", "Poznań", "Katowice", "Białystok"]
PARAMETERS = ['pm25', 'pm10', 'no2', 'so2', 'o3', 'co']
LOCATIONS = ["Warszawa-Śródmieście", "Kraków-", "Poznań", "Katowice", "Białystok-Miejska", "WIOŚ Elbląg ul. Bażyńskiego"]

In [5]:
import unicodedata

def strip_accents(text):
    return ''.join(c for c in unicodedata.normalize('NFKD', text.lower().replace("ł","l")) if unicodedata.category(c) != 'Mn')


In [11]:
data = {}
indices = [""] + [f".{i}" for i in range(1, 6)]

df = pd.read_csv("https://raw.githubusercontent.com/PrzemyslawSarnacki/AirQualityPrediction/master/data/airq_data_2.csv", index_col=0, parse_dates=True)

for city in CITIES:
    data[strip_accents(city)] = pd.DataFrame(df, columns=[f"{strip_accents(city)}{item}" for item in indices])
    data[strip_accents(city)] = data[strip_accents(city)].rename(columns=data[strip_accents(city)].iloc[0]).drop(data[strip_accents(city)].index[0])
    data[strip_accents(city)] = data[strip_accents(city)].astype(float).interpolate(method="linear")
    data[strip_accents(city)].index = pd.to_datetime(data[strip_accents(city)].index)

for city in CITIES:
  helper_df = pd.DataFrame()
  for param in PARAMETERS:
    last_date = data[strip_accents(city)].index[-1] 
    last_date_str = last_date.strftime("%Y-%m-%d")
    res = api.measurements(city=city, parameter=param, date_from=last_date_str, limit=100000, df=True)
    helper_df[param] = res.resample('D').mean()["value"]
  data[strip_accents(city)] = data[strip_accents(city)].append(helper_df[1:])

output = pd.concat([data[strip_accents(city)] for city in CITIES], axis=1, keys=[strip_accents(city) for city in CITIES])
output.to_csv("airq_data_2.csv")

In [12]:
output

Unnamed: 0_level_0,warszawa,warszawa,warszawa,warszawa,warszawa,warszawa,krakow,krakow,krakow,krakow,krakow,krakow,poznan,poznan,poznan,poznan,poznan,poznan,katowice,katowice,katowice,katowice,katowice,katowice,bialystok,bialystok,bialystok,bialystok,bialystok,bialystok
Unnamed: 0_level_1,pm25,pm10,no2,so2,o3,co,pm25,pm10,no2,so2,o3,co,pm25,pm10,no2,so2,o3,co,pm25,pm10,no2,so2,o3,co,pm25,pm10,no2,so2,o3,co
2018-11-21,22.680000,26.282667,51.060667,1.424000,44.498000,498.200000,37.879887,41.672523,24.751955,5.212144,27.496200,560.878000,30.127440,30.951780,17.683680,8.351188,45.236780,392.002000,41.560840,42.592660,20.914910,9.770674,23.209040,249.910000,11.200000,12.408000,12.440000,0.900000,41.200000,296.800000
2018-11-22,25.774699,31.172410,39.961690,3.766571,41.121695,544.666667,43.060880,53.281576,31.354529,5.683315,22.508110,737.847174,38.620329,39.688342,17.666757,7.692360,41.097725,454.963333,46.076654,48.480421,31.509104,13.414775,15.636835,303.806250,12.625000,20.359167,19.879167,0.770833,27.695652,320.750000
2018-11-23,32.482812,38.594687,27.500694,2.762292,20.025361,674.000000,65.079628,78.740023,33.692866,5.756335,8.069167,907.656739,62.896750,66.366117,26.162504,10.555977,13.237358,802.932083,67.660546,69.715683,45.788810,16.333717,3.306182,641.056250,19.458333,23.777500,12.904167,0.670833,25.083333,313.291667
2018-11-24,44.751744,52.565233,22.325455,2.641136,6.046485,960.818182,57.657333,60.424491,33.371344,6.685367,3.632129,1327.426458,73.236854,76.834387,27.351296,13.183904,3.780671,1006.536667,54.797967,58.397863,42.874250,12.576402,3.909433,564.147917,28.625000,35.547917,12.383333,0.895833,15.541667,441.541667
2018-11-25,57.130000,67.660972,22.806481,3.836667,3.621494,1395.444444,56.164240,58.125554,29.404584,9.965788,4.432902,1223.761875,85.134404,89.157079,19.223183,11.683726,3.199074,1038.900435,46.836171,48.387150,33.070223,14.028568,6.624483,315.086087,38.375000,50.090417,12.091667,0.895833,2.041667,552.250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-18,38.464364,47.061727,34.776932,6.420909,59.542182,572.818182,41.288734,43.282744,41.185811,5.282256,27.657150,726.007674,54.725520,58.710611,29.065485,11.612762,22.898364,748.637143,38.681641,50.515084,51.928430,14.542214,14.306563,632.779545,21.642857,26.462727,15.168182,1.368182,59.454545,371.500000
2021-02-19,41.226271,49.278305,29.109778,5.962500,56.307447,604.583333,67.134912,71.736301,40.431431,6.176960,26.706743,868.035000,41.851196,44.348504,25.530847,13.286627,26.843250,703.150833,44.369708,61.024787,50.814188,14.329750,15.503637,748.277500,26.750000,31.374167,7.891667,3.958333,59.500000,333.666667
2021-02-20,42.200000,51.962000,47.722500,6.740000,25.485000,790.000000,65.679700,74.765950,55.975150,4.732100,2.079820,954.730000,48.769000,53.920750,51.084900,12.407200,4.699440,1161.700000,70.565100,84.608200,56.763200,14.530100,3.904550,932.020000,45.000000,69.960000,12.500000,3.800000,55.000000,495.000000
2021-02-21,38.821727,50.789545,37.345294,7.416364,49.365322,983.954545,60.642557,63.703705,51.143910,4.345534,20.562133,1033.460000,43.023339,48.047284,21.818083,12.396236,39.284855,686.965909,53.972968,69.300218,48.820223,16.758364,23.838002,772.198636,37.318182,55.420909,15.645455,1.686364,50.363636,565.772727
