In [1]:
import pandas as pd
import numpy as np
from pyaemet import utilities as utl

In [2]:
HOME = "/home/jaimedgp/Repositories/covidIncidence-AirQuality/"

In [3]:
country = "Spain"

eea_country = "+".join(country.split(" "))

eea_url = ("http://aided.apps.eea.europa.eu/tools/download?download_query=http"
           + "%3A%2F%2Faided.apps.eea.europa.eu%2F%3Fsource%3D%7B%22query%22%"
           + "3A%7B%22bool%22%3A%7B%22must%22%3A%5B%7B%22term%22%3A%7B%22Coun"
           + "tryOrTerritory%22%3A%22" + eea_country + "%22%7D%7D%5D%7D%7D%2C%"
           + "22display_type%22%3A%22tabular%22%7D&download_format=csv")

In [4]:
eea_sites = pd.read_csv(eea_url).rename(columns={"Latitude": "latitude",
                                                 "Longitude": "longitude",})

# EEA situa la estacion de San Nicolas de Las Palmas de Gran Canaria sobre el 
# mar. Se corrige la localizacion por las coordenadas reales
eea_sites.loc[eea_sites.EoICode == "ES2097A", 
              ["latitude", "longitude"]
              ] = [28.10272, -15.42118]

In [5]:
filter_sites = eea_sites[(eea_sites["StationType"] == "Background") &
                         ((eea_sites["StationArea"] == "Suburban") | 
                          (eea_sites["StationArea"] == "Urban"))
                         ].drop_duplicates(subset=['EoICode'])

In [6]:
duplicate = {"La Coruña": "A Coruña",
             "Alacant": "Alicante",
             "València": "Valencia",
             "Castelló": "Castellón",
             "Gerona": "Girona"
             }

location_sites = utl.get_site_address(filter_sites).replace(duplicate)

In [7]:
location_sites.head()

Unnamed: 0,CountryOrTerritory,Namespace,Network,ResponsibleParty,TimeZone,StationName,EoICode,StationType,StationArea,Pollutant,...,MeasurementEquipment,MeasurementMethod,SamplingEquipment,AnalyticalTechnique,envelope,District,City,Subregion,Region,CountryCode
0,Spain,ES.BDCA.AQD,CCAA Com. Valenciana,"CONSELLERIA DE AGRICULTURA, MEDIO AMBIENTE, CA...",UTC,BENIDORM,ES1675A,Background,Suburban,Nitrogen monoxide (air),...,Unknown,Chemiluminescence,,,http://cdr.eionet.europa.eu/es/eu/aqd/d/envxfin7q,,Benidorm,Alicante,Comunitat Valenciana,ESP
1,Spain,ES.BDCA.AQD,CCAA Com. Valenciana,"CONSELLERIA DE AGRICULTURA, MEDIO AMBIENTE, CA...",UTC,ELX-AGROALIMENTARI,ES1624A,Background,Suburban,Benzo(a)anthracene in PM10 (aerosol),...,,,,Gas chromatography + mass spectrometry (GC-MS),http://cdr.eionet.europa.eu/es/eu/aqd/d/envxfin7q,,Elche,Alicante,Comunidad Valenciana,ESP
2,Spain,ES.BDCA.AQD,CCAA Andalucía,"CONSEJERIA DE AGRICULTURA, GANADERIA, PESCA Y ...",UTC,EL BOTICARIO,ES1786A,Background,Suburban,Ozone (air),...,Unknown,Ultraviolet (UV) photometry,,,http://cdr.eionet.europa.eu/es/eu/aqd/d/envxfin7q,,Almería,Almería,Andalucía,ESP
3,Spain,ES.BDCA.AQD,CCAA Andalucía,"CONSEJERIA DE AGRICULTURA, GANADERIA, PESCA Y ...",UTC,EL EJIDO,ES1549A,Background,Urban,o-Xylene (air),...,,,,Gas chromatography + mass spectrometry (GC-MS),http://cdr.eionet.europa.eu/es/eu/aqd/d/envxfin7q,El Ejido,El Ejido,Almería,Andalucía,ESP
4,Spain,ES.BDCA.AQD,CCAA Extremadura,"CONSEJERIA DE MEDIO AMBIENTE Y RURAL, POLITICA...",UTC,BADAJOZ,ES1601A,Background,Urban,Carbon monoxide (air),...,DASIBI 3008 CO analyser,Non-dispersive infrared spectroscopy (NDIR),,,http://cdr.eionet.europa.eu/es/eu/aqd/d/envxfin7q,,Badajoz,Badajoz,Extremadura,ESP


In [8]:
cities_names = {"Corrales de Buelna, Los": "Los Corrales de Buelna",
                "Pamplona/Iruña": "Pamplona",
                "Alicante/Alacant": "Alicante",
                "Elche/Elx": "Elche",
                "Ejido, El": "El Ejido",
                "Alcoy/Alcoi": "Alcoy",
                "Sagunto/Sagunt": "Sagunto",
                "Olite/Erriberri": "Olite",
                "Palma ": "Palma",
                "Coruña, A": "A Coruña",
                "Línea de la Concepción, La": "La Línea de la Concepción",
                "Hospitalet de Llobregat, L'": "l'Hospitalet de Llobregat",
                "Prat de Llobregat, El": "El Prat de Llobregat",
                "Vall d'Uixó, la": "La Vall d'Uixó",
                "Bisbal d'Empordà, La": "La Bisbal d'Empordà",
                "València": "Valencia",
                "Palmas de Gran Canaria, Las": "Las Palmas de Gran Canaria",
                "Agurain/Salvatierra": "Salvatierra",
                "Vilafranca/Villafranca del Cid": "Villafranca del Cid",
                "Altsasu/Alsasua": "Alsasua",
                "Castelló de la Plana": "Castellón",
                "Arboç, L'": "l'Arboç",
                "Donostia/San Sebastián": "San Sebastián",
                }

mun_population = pd.read_excel(HOME+"data/ine/pobmun20.xlsx",
                               skiprows=1).replace(cities_names)

In [9]:
info_sites = pd.merge(location_sites, 
                      mun_population[["NOMBRE", 
                                      "POB20"]
                                     ].rename(columns={"NOMBRE": "City"}),
                      left_on="City", right_on="City",
                      how="left"
                      )

In [10]:
info_sites[pd.isna(info_sites["POB20"])]

Unnamed: 0,CountryOrTerritory,Namespace,Network,ResponsibleParty,TimeZone,StationName,EoICode,StationType,StationArea,Pollutant,...,MeasurementMethod,SamplingEquipment,AnalyticalTechnique,envelope,District,City,Subregion,Region,CountryCode,POB20


In [11]:
info_sites.to_csv(HOME+"data/background-sub-urban-eea-stations.csv", 
                  index=False)