# Merge All Data

Merge air quality, AEMET climate and ERA5-Land data for each station.

In [1]:
# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))

In [2]:
# Working directory
setwd("~/Repositories/AirQualityCOVID")

# Source some scripts 
source("src/Curation/functions.R")

### Main Variables

In [3]:
start_dt <- ymd_hms("2013-01-01 00:00:00")
end_dt <- ymd_hms("2020-12-31 00:00:00")

## Stations Information

In [4]:
#    Air Quality Stations
#-----------------------------

sitesAQ <- read.csv("data/Curation/AirQuality/checked_sitesAQ.csv",
                    stringsAsFactor=F)

#-----------------------------
#       AEMET Stations
#-----------------------------

sitesMto <- read.csv("data/Curation/AEMET/checked_sites_AEMET.csv",
                     stringsAsFactor=F)

"cannot open file 'data/Curation/AirQuality/checked_sitesAQ.csv': No such file or directory"


ERROR: Error in file(file, "rt"): cannot open the connection


In [5]:
#--------------------------------
#       Relevant Information
#--------------------------------

list.data <- list()

#sites.lv <- levels(as.factor(sitesAQ$site))[1:2]
#
# Madrid, Santander and Barcelona stations
sites.lv <- c("es0118a", "es1580a", "es1480a")

for (st in sites.lv) {
    plls <- sitesAQ[sitesAQ$site == st, "Pollutant"]
    ind <- sitesMto[sitesMto$siteAQ == st, "indicativo"]
    list.data[[st]] <- list(pollutants = plls,
                             indicativo = ind[1])
}

ERROR: Error in eval(expr, envir, enclos): object 'sitesAQ' not found


In [None]:
#-----------------------------
#    ERA5-Land Data
#-----------------------------

folder.ERA5.Land <- "data/Curation/ERA5-Land/Values/"

# Relative Humidity
rh.ERA5.Land <- read.csv(paste(folder.ERA5.Land,
                               "rh_daily_2010_2020_final_stations.csv",
                               sep=""), stringsAsFactor=F) %>% 
                    data.as.datetime("dates", "ymd") %>%
                    add_column(variable="RH", .after="dates")

# Solar radiation
ssrd.ERA5.Land <- read.csv(paste(folder.ERA5.Land,
                                 "ssrd_daily_2010_2020_final_stations.csv",
                                 sep=""),stringsAsFactor=F) %>% 
                    data.as.datetime("dates", "ymd") %>%
                    add_column(variable="solar.radiation", .after="dates")

ERA5.Land <- rbind(rh.ERA5.Land, ssrd.ERA5.Land)

## Merge Process

In [None]:
# Folder wherever take downloaded data, if it exists
AQ.files <- "data/Curation/AirQuality/Values/"
Mto.files <- "data/Curation/AEMET/Values/"

In [None]:
for (st in names(list.data)) {
    dataAQ <- get.AQdata(st, list.data[[st]]$pollutants, 
                         start_dt=start_dt, end_dt=end_dt,
                         data.by.file=TRUE, fileName=AQ.files) %>%
                    group.by.date(formulation = value ~ date + site + variable,
                                  dateCl="date", unit="day", FUN="mean") %>%
                    data.as.datetime("date", "ymd") %>%
                    pivot.long.table(valueCl = "value", variableCl="variable")
    
    dataMto <- read.csv(paste(Mto.files,
                              list.data[[st]]$indicativo, ".csv",
                              sep=""), stringsAsFactor=F) %>%
                    data.as.datetime("fecha", "ymd") %>%
                    select("fecha", "indicativo",
                           "tmed", "prec", "tmin", "tmax", "dir",
                           "velmedia", "racha", "presMax", "presMin"
                           )
    
    merge.data <- merge(x = dataAQ, y = dataMto,
                        by.x = "date", by.y = "fecha", all.x = TRUE)
    
    for (vr in levels(as.factor(ERA5.Land$variable))) {
        merge.data <- merge(x = merge.data, 
                            y = ERA5.Land[ERA5.Land$variable == vr,
                                          c("dates", st)],
                            by.x = "date", by.y = "dates", all.x = TRUE
                           )
        names(merge.data)[ncol(merge.data)] <- vr
    }
    
    info.nms <- c("date", "site", "indicativo")
    variables.nms <- names(merge.data)[-which(names(merge.data) %in% info.nms)]
    
    list.data[[st]]$data <- cbind(merge.data[, info.nms],
                                  merge.data[, variables.nms])
}

## Guardar Datos en csv 

```R
for (st in names(list.data)) {
    write.csv(list.data[[st]]$data, paste("data/Curation/Values",
                                          st, ".csv", sep=""), 
              row.names=FALSE)
}
```