# Merge All Data

Merge air quality, AEMET climate and ERA5-Land data for each station.

In [1]:
# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))

In [2]:
# Working directory
setwd("~/Repositories/AirQualityCOVID")

# Source some scripts 
source("src/functions.R")

### Main Variables

In [3]:
start_dt <- ymd_hms("2013-01-01 00:00:00")
end_dt <- ymd_hms("2020-12-31 00:00:00")

#sites.lv <- levels(as.factor(sites.AQ$site))[1:3]
sites.lv <- c("es0118a", "es1438a") # Big cities (Madrid and Barcelona)",
sites.lv <- c(sites.lv, "es1580a", "es1340a") # small cities (Santander and Huelva)

## Stations Information

In [4]:
#-----------------------------
#    Air Quality Stations
#-----------------------------

sites.AQ <- read.csv("data/Curation/checked_AQ.csv",
                    stringsAsFactor=T)

#-----------------------------
#       AEMET Stations
#-----------------------------

sites.AEMET <- read.csv("data/Curation/checked_AEMET.csv",
                        stringsAsFactor=F, colClasses=c("indicativo"="character"))

#-----------------------------
#       WorldMet Stations
#-----------------------------

sites.WorldMet <- read.csv("data/Curation/checked_WorldMet.csv",
                     stringsAsFactor=T)

In [5]:
head(sites.AEMET)

Unnamed: 0_level_0,latitud,provincia,altitud,indicativo,nombre,indsinop,longitud,dist,siteAQ
Unnamed: 0_level_1,<dbl>,<chr>,<int>,<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>
1,43.29806,BIZKAIA,42,1082,BILBAO AEROPUERTO,8025,-2.906389,5.396849,es0041a
2,43.29806,BIZKAIA,42,1082,BILBAO AEROPUERTO,8025,-2.906389,5.756578,es0110a
3,40.41194,MADRID,667,3195,"MADRID, RETIRO",8222,-3.678056,1.137154,es0118a
4,40.41194,MADRID,667,3195,"MADRID, RETIRO",8222,-3.678056,4.417472,es0120a
5,37.41667,SEVILLA,34,5783,SEVILLA AEROPUERTO,8391,-5.879167,7.968651,es0817a
6,37.41667,SEVILLA,34,5783,SEVILLA AEROPUERTO,8391,-5.879167,11.220392,es0890a


## n-Table 

Table with all the relations between Air Quality stations with AEMET and WorldMet stations code

| siteAQ  | AEMET |   WorldMet   |
|---------|-------|--------------|
| es1580a | 1111  | 080210-99999 |
| es0118a | 3195  | 082230-99999 |
| es1438a | 0201D | 081810-99999 |
| es1340a | 4642E | 083830-99999 |

In [6]:
nn.stations <- data.frame()

for (st in sites.lv) {
    nn.stations <- rbind(nn.stations,
                         data.frame(siteAQ = as.factor(st),
                                    AEMET = as.factor(sites.AEMET[sites.AEMET$siteAQ == st,
                                                                  "indicativo"]),
                                    WorldMet = sites.WorldMet[sites.WorldMet$siteAQ == st,
                                                              "code"]
                                   )
                        )
}

head(nn.stations)

Unnamed: 0_level_0,siteAQ,AEMET,WorldMet
Unnamed: 0_level_1,<fct>,<fct>,<fct>
1,es0118a,3195,082230-99999
2,es1438a,0200E,081810-99999
3,es1580a,1111,080230-99999
4,es1340a,4642E,083830-99999


## ERA5-Land Data

In [7]:
#-----------------------------
#    ERA5-Land Data
#-----------------------------

folder.ERA5.Land <- "data/Curation/ERA5-Land/"

# Relative Humidity
load(paste(folder.ERA5.Land,
           "rh_daily_2010_2020_final_stations.rda", sep=""))

rh.ERA5.Land <- data.as.datetime(df, "dates", "ymd") %>%
                    add_column(variable="RH", .after="dates")

# Solar radiation
load(paste(folder.ERA5.Land, 
           "ssrd_daily_2010_2020_final_stations.rda", sep=""))

ssrd.ERA5.Land <- data.as.datetime(df, "dates", "ymd") %>%
                    add_column(variable="solar.radiation", .after="dates")

ERA5.Land <- rbind(rh.ERA5.Land, ssrd.ERA5.Land)

## Merge Process

In [8]:
# Folder wherever take downloaded data, if it exists
Mto.files <- "data/Curation/"

data_Mto <- data.frame()

In [9]:
for (st in levels(nn.stations$siteAQ)) {
    
    if (st %in% names(ERA5.Land)) {

        code <- nn.stations[nn.stations$siteAQ == st, "WorldMet"]

        data.WorldMet <- read.csv(paste(Mto.files, "WorldMet/",
                                     code, ".csv", sep=""), stringsAsFactor=F) %>%
                        data.as.datetime("date", "ymd") %>%
                        select(-"code")

        indicativo <- nn.stations[nn.stations$siteAQ == st, "AEMET"]

        data.AEMET <- read.csv(paste(Mto.files, "AEMET/",
                                     indicativo, ".csv", sep=""), stringsAsFactor=F) %>%
                        data.as.datetime("fecha", "ymd") %>%
                        select("fecha", "tmed", "prec", 
                               "tmin", "tmax", "presMax", "presMin"
                               )

        data.row <- merge(x = data.WorldMet, y = data.AEMET,
                            by.x = "date", by.y = "fecha", all = TRUE)

        for (vr in levels(as.factor(ERA5.Land$variable))) {
            data.row <- merge(x = data.row, 
                              y = ERA5.Land[ERA5.Land$variable == vr,
                                            c("dates", st)],
                              by.x = "date", by.y = "dates", all.x = TRUE
                             )
            names(data.row)[ncol(data.row)] <- vr
        }

        data.row[, "site"] <- st
        data_Mto <- rbind(data_Mto, data.row)
    }
}

In [10]:
head(data.row)

Unnamed: 0_level_0,date,ws,wd,atmos_pres,tmed,prec,tmin,tmax,presMax,presMin,RH,solar.radiation,site
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,2013-01-01,,,,12.6,0.0,9.1,16.0,1023.5,1017.7,81.2693,113.2557,es1340a
2,2013-01-02,,,,11.3,0.0,5.1,17.5,1026.0,1023.0,74.36909,104.7072,es1340a
3,2013-01-03,,,,10.3,0.0,4.8,15.8,1034.2,1025.8,68.04736,117.4293,es1340a
4,2013-01-04,,,,11.3,0.0,6.7,15.9,1034.2,1031.4,69.8418,121.7122,es1340a
5,2013-01-05,,,,12.1,0.0,5.3,18.9,1033.9,1030.2,79.40657,123.707,es1340a
6,2013-01-06,,,,11.4,0.1,5.7,17.0,1030.8,1026.8,87.14077,115.2333,es1340a


## Guardar Datos

```R
save(data_Mto, nn.stations, file="data/meteorology.rda")
```