<a href="https://colab.research.google.com/github/PrzemyslawSarnacki/AirQualityPrediction/blob/master/update_data.ipynb" target="_parent">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> 

In [1]:
!pip install py-openaq

Collecting py-openaq
  Downloading https://files.pythonhosted.org/packages/28/8f/80d874a1ea7abee6c788cc6e33fc2ef982be82a9d30156c4ac122ac0ffe2/py-openaq-1.1.0.tar.gz
Building wheels for collected packages: py-openaq
  Building wheel for py-openaq (setup.py) ... [?25l[?25hdone
  Created wheel for py-openaq: filename=py_openaq-1.1.0-cp36-none-any.whl size=9039 sha256=33a1d19ed9c28841a7640fe57ea29f8ad14618a903b8aff112a6f858e342d492
  Stored in directory: /root/.cache/pip/wheels/d1/0a/63/debef9801434e608f1a1b7c3dc5a2d508b293d76c4eefba481
Successfully built py-openaq
Installing collected packages: py-openaq
Successfully installed py-openaq-1.1.0


In [2]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import openaq
import warnings
from datetime import timedelta

warnings.simplefilter('ignore')

%matplotlib inline

# Set major seaborn asthetics
sns.set("notebook", style='ticks', font_scale=1.0)

# Increase the quality of inline plots
mpl.rcParams['figure.dpi']= 500


In [3]:
api = openaq.OpenAQ()
resp = api.cities(df=True, limit=10000)

# display the first 10 rows
resp.query("country == 'PL'")

Unnamed: 0,country,name,city,count,locations
151,PL,Augustów,Augustów,161132871.0,2
223,PL,Belsk,Belsk,21770.0,1
224,PL,Belsk Duży,Belsk Duży,125822709.0,2
248,PL,Biała,Biała,94279227.0,1
249,PL,Biała Podlaska,Biała Podlaska,128518539.0,1
...,...,...,...,...,...
2449,PL,Zielonka,Zielonka,158649172.0,1
2454,PL,Złoty Potok,Złoty Potok,184236679.0,1
2456,PL,Żory,Żory,63993420.0,1
2458,PL,Żyrardów,Żyrardów,64404821.0,2


In [4]:
CITIES = ["Warszawa", "Kraków", "Poznań", "Katowice", "Białystok"]
PARAMETERS = ['pm25', 'pm10', 'no2', 'so2', 'o3', 'co']
LOCATIONS = ["Warszawa-Śródmieście", "Kraków-", "Poznań", "Katowice", "Białystok-Miejska", "WIOŚ Elbląg ul. Bażyńskiego"]

In [5]:
import unicodedata

def strip_accents(text):
    return ''.join(c for c in unicodedata.normalize('NFKD', text.lower().replace("ł","l")) if unicodedata.category(c) != 'Mn')


In [6]:
data = {}
indices = [""] + [f".{i}" for i in range(1, 6)]

df = pd.read_csv("https://raw.githubusercontent.com/PrzemyslawSarnacki/AirQualityPrediction/master/data/airq_data_2.csv", index_col=0, parse_dates=True)

for city in CITIES:
    data[strip_accents(city)] = pd.DataFrame(df, columns=[f"{strip_accents(city)}{item}" for item in indices])
    data[strip_accents(city)] = data[strip_accents(city)].rename(columns=data[strip_accents(city)].iloc[0]).drop(data[strip_accents(city)].index[0])
    data[strip_accents(city)] = data[strip_accents(city)].astype(float).interpolate(method="linear")
    data[strip_accents(city)].index = pd.to_datetime(data[strip_accents(city)].index)

for city in CITIES:
  helper_df = pd.DataFrame()
  for param in PARAMETERS:
    last_date = data[strip_accents(city)].index[-1] + timedelta(days=1) 
    last_date_str = last_date.strftime("%Y-%m-%d")
    res = api.measurements(city=city, parameter=param, date_from=last_date_str, limit=100000, df=True)
    helper_df[param] = res.resample('D').mean()["value"]
  data[strip_accents(city)] = data[strip_accents(city)].append(helper_df)

output = pd.concat([data[strip_accents(city)] for city in CITIES], axis=1, keys=[strip_accents(city) for city in CITIES])
output.to_csv("airq_data_2.csv")