In [1]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import re
import pandas as pd
import datetime
import os
from os.path import isfile, join

In [5]:
start_year="2021"
end_year="2022"

url="https://sinqlair.carm.es/calidadaire/obtener_datos.aspx?tipo=tablaRedVigilancia"

data_dates={
    'tipoConsulta': "medias_diarias",
    'estacionesSelec': "e_sanbasilio_San Basilio",
    'contaminantesSelec': "NO2-NOX*NOx-O3-PM10-PM25",
    'periodoConsulta': "sel_rango",
    'tipo_dato': "grid",
    'fechaInicioConsulta': "01/01/"+start_year,
    'fechaFinConsulta': "01/01/"+end_year
}

headers= {
    'Host': "sinqlair.carm.es",
    'User-Agent': "Mozilla/5.0 Gecko/20100101 Firefox/70.0",
    'Accept': "*/*",
    'Accept-Language': "en-US,en;q=0.5",
    'Accept-Encoding': "gzip, deflate, br",
    'X-Requested-With': "XMLHttpRequest",
    'Content-Type': "application/x-www-form-urlencoded; charset=UTF-82",
    'Content-Length': "114",
    'Origin': "https://sinqlair.carm.es",
    'DNT': "1",
    'Connection': "keep-alive",
    'Referer': "https://sinqlair.carm.es/calidadaire/estaciones/estacion.aspx?San%20Basilio",
    'Cookie': "ASP.NET_SessionId=qhnag2eedjorexs450mudas1"
}

def tidy_data(data):
    data.columns = ['fecha', 'station', 'limit', 'NO2', 'NOX','O3','PM10','PM25']

    data['FULL_DATE'] = data['fecha'].apply(str_date)
    data['DATE'] = data['FULL_DATE'].apply(date_date)
    data['DAY'] = data['FULL_DATE'].apply(day_date)
    data['MONTH'] = data['FULL_DATE'].apply(month_date)
    data['YEAR'] = data['FULL_DATE'].apply(year_date)
    data['HOUR'] = data['FULL_DATE'].apply(hour_date)
    data['WEEKDAY'] = data['FULL_DATE'].apply(weekday_date)
    data['WEEKEND'] = data['FULL_DATE'].apply(weekend_date)
    data['PM10_Q'] = data['PM10'].apply(pm_10q)
    data['PM25_Q'] = data['PM25'].apply(pm_25q)
    data['QUALITY'] = data['PM10'].apply(quality)
    
    data = data[['DATE','DAY','MONTH','YEAR','HOUR','WEEKDAY','WEEKEND','O3','NO2','NOX','PM10','PM10_Q','PM25','PM25_Q','QUALITY']]
    data = data.replace('---', np.nan)
    return data

def str_date(date):
    return datetime.datetime.strptime(date,"%d/%m/%Y %H:%M:%S")

def date_date(date):
    return date.strftime("%d/%m/%Y")

def day_date(date):
    return date.day

def month_date(date):
    return date.month

def year_date(date):
    return date.year

def hour_date(date):
    return date.hour

def weekday_date(date):
    return date.isoweekday()

def weekend_date(date):
    return "Weekend" if date.isoweekday()> 5 else "Weekday"

def pm_10q(pm10):
    pm10 = pd.to_numeric(pm10, errors='coerce')
    if (pm10 <= 20): 
        return "Buena"
    if (pm10 >20 and pm10 <=40 ):
        return "Razonablemente buena"
    if (pm10 >40 and pm10 <=50 ):
        return "Regular"
    if (pm10 >50 and pm10 <=100 ):
        return "Desfavorable"
    if (pm10 >100 and pm10 <=150 ):
        return "Muy desfavorable"
    if (pm10 >150):
        return "Extremandamente desfavorable"

def pm_25q(pm25):
    pm25 = pd.to_numeric(pm25, errors='coerce')
    if (pm25 <= 10): 
        return "Buena"
    if (pm25 >10 and pm25 <=20 ):
        return "Razonablemente buena"
    if (pm25 >20 and pm25 <=25 ):
        return "Regular"
    if (pm25 >25 and pm25 <=50 ):
        return "Desfavorable"
    if (pm25 >50 and pm25 <=75 ):
        return "Muy desfavorable"
    if (pm25 >75):
        return "Extremandamente desfavorable"

def quality(pm10):
    pm10 = pd.to_numeric(pm10, errors='coerce')
    if (pm10 <= 50): 
        return "GOOD"
    if (pm10 >50 ):
        return "BAD"


In [6]:
r = requests.post(url, data=data_dates, headers=headers)
data = r.text.replace("ï»¿","")
data = data.replace("\r\n","")

data = pd.read_json(data)
data = tidy_data(data)
data.to_csv("../data/mean-daily-"+start_year+".csv", index=None, header=True)

In [7]:
data

Unnamed: 0,DATE,DAY,MONTH,YEAR,HOUR,WEEKDAY,WEEKEND,O3,NO2,NOX,PM10,PM10_Q,PM25,PM25_Q,QUALITY
0,01/01/2021,1,1,2021,0,5,Weekday,40,27,46,55,Desfavorable,37,Desfavorable,BAD
1,02/01/2021,2,1,2021,0,6,Weekend,39,26,52,13,Buena,12,Razonablemente buena,GOOD
2,03/01/2021,3,1,2021,0,7,Weekend,45,24,45,28,Razonablemente buena,16,Razonablemente buena,GOOD
3,04/01/2021,4,1,2021,0,1,Weekday,32,37,82,30,Razonablemente buena,23,Regular,GOOD
4,05/01/2021,5,1,2021,0,2,Weekday,31,47,105,35,Razonablemente buena,28,Desfavorable,GOOD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,27/12/2021,27,12,2021,0,1,Weekday,13,19,56,40,Razonablemente buena,16,Razonablemente buena,GOOD
361,28/12/2021,28,12,2021,0,2,Weekday,23,16,36,21,Razonablemente buena,12,Razonablemente buena,GOOD
362,29/12/2021,29,12,2021,0,3,Weekday,25,16,31,32,Razonablemente buena,12,Razonablemente buena,GOOD
363,30/12/2021,30,12,2021,0,4,Weekday,9,,,35,Razonablemente buena,21,Regular,GOOD
