# Merge All Data 

Merge air quality, AEMET climate and ERA5-Land data for each station.

In [1]:
# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))

In [2]:
# Working directory
setwd("~/Repositories/AirQualityCOVID")

# Source some scripts 
source("src/Curation/functions.R")
source("src/Curation/general1.R")

### Main Variables

In [3]:
start_dt <- ymd_hms("2013-01-01 00:00:00")
end_dt <- ymd_hms("2020-12-31 00:00:00")

## Stations Information

In [5]:
#-----------------------------
#    Air Quality Stations
#-----------------------------

sitesAQ <- read.csv("data/Curation/AirQuality/checked_sitesAQ.csv",
                    stringsAsFactor=F)

#-----------------------------
#       AEMET Stations
#-----------------------------

sitesMto <- read.csv("data/Curation/AEMET/checked_sites_AEMET.csv",
                     stringsAsFactor=F)

In [5]:
#--------------------------------
#       Relevant Information
#--------------------------------

list.data <- list()

#sites.lv <- levels(as.factor(sitesAQ$site))[1:2]
#
# Madrid, Santander and Barcelona stations
sites.lv <- c("es0118a", "es1580a", "es1480a")

for (st in sites.lv) {
    plls <- sitesAQ[sitesAQ$site == st, "Pollutant"]
    ind <- sitesMto[sitesMto$siteAQ == st, "indicativo"]
    list.data[[st]] <- list(pollutants = plls,
                             indicativo = ind[1])
}

In [6]:
#-----------------------------
#    ERA5-Land Data
#-----------------------------

folder.ERA5.Land <- "data/Curation/ERA5-Land/Values/"

# Relative Humidity
rh.ERA5.Land <- read.csv(paste(folder.ERA5.Land,
                               "rh_daily_2010_2020_final_stations.csv",
                               sep=""), stringsAsFactor=F) %>% 
                    data.as.datetime("dates", "ymd") %>%
                    add_column(variable="RH", .after="dates")

# Solar radiation
ssrd.ERA5.Land <- read.csv(paste(folder.ERA5.Land,
                                 "ssrd_daily_2010_2020_final_stations.csv",
                                 sep=""),stringsAsFactor=F) %>% 
                    data.as.datetime("dates", "ymd") %>%
                    add_column(variable="solar.radiation", .after="dates")

ERA5.Land <- rbind(rh.ERA5.Land, ssrd.ERA5.Land)

## Merge Process

In [7]:
# Folder wherever take downloaded data, if it exists
AQ.files <- "data/Curation/AirQuality/Values/"
Mto.files <- "data/Curation/AEMET/Values/"

In [9]:

for (st in names(list.data)) {
    dataAQ <- get.AQdata(st, list.data[[st]]$pollutants, 
                         start_dt=start_dt, end_dt=end_dt,
                         data.by.file=TRUE, fileName=AQ.files) %>%
                    group.by.date(formulation = value ~ date + site + variable,
                                  dateCl="date", unit="day", FUN="mean") %>%
                    data.as.datetime("date", "ymd") %>%
                    pivot.long.table(valueCl = "value", variableCl="variable")
    
    dataMto <- read.csv(paste(Mto.files,
                              list.data[[st]]$indicativo, ".csv",
                              sep=""), stringsAsFactor=F) %>%
                    data.as.datetime("fecha", "ymd") %>%
                    select("fecha", "indicativo",
                           "tmed", "prec", "tmin", "tmax", "dir",
                           "velmedia", "racha", "presMax", "presMin"
                           )
    
    merge.data <- merge(x = dataAQ, y = dataMto,
                        by.x = "date", by.y = "fecha", all.x = TRUE)
    
    for (vr in levels(as.factor(ERA5.Land$variable))) {
        merge.data <- merge(x = merge.data, 
                            y = ERA5.Land[ERA5.Land$variable == vr,
                                          c("dates", st)],
                            by.x = "date", by.y = "dates", all.x = TRUE
                           )
        names(merge.data)[ncol(merge.data)] <- vr
    }
    
    info.nms <- c("date", "site", "indicativo")
    variables.nms <- names(merge.data)[-which(names(merge.data) %in% info.nms)]
    
    list.data[[st]]$data <- cbind(merge.data[, info.nms],
                                  merge.data[, variables.nms])
}

In [10]:
head(list.data[["es1580a"]]$data)

Unnamed: 0_level_0,date,site,indicativo,no,no2,pm10,tmed,prec,tmin,tmax,dir,velmedia,racha,presMax,presMin,RH,solar.radiation
Unnamed: 0_level_1,<date>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2013-01-01,es1580a,1111,3.708333,14.91667,21.125,11.6,55,10.6,12.6,29,7.5,17.2,1019.6,1007.3,88.11917,78.56742
2,2013-01-02,es1580a,1111,20.416667,46.75,21.79167,11.2,7,9.8,12.5,33,5.3,13.1,1028.7,1019.6,86.44662,60.13676
3,2013-01-03,es1580a,1111,13.541667,38.66667,20.95833,10.8,0,8.4,13.2,9,5.8,13.1,1030.6,1028.7,85.19767,48.69664
4,2013-01-04,es1580a,1111,47.25,46.625,30.91667,11.4,0,8.0,14.7,12,2.5,7.8,1031.2,1029.6,83.13043,81.60853
5,2013-01-05,es1580a,1111,19.833333,38.29167,27.95833,9.1,0,7.7,10.5,13,2.8,5.6,1029.7,1027.6,89.72875,89.20845
6,2013-01-06,es1580a,1111,6.375,21.25,20.20833,10.5,0,8.0,13.0,12,3.3,7.8,1027.7,1022.4,90.22318,86.77671


### Guardar Datos en csv <a id="saveAQ"></a>

```R

for (st in names(list.data)) {
    write.csv(list.data[[st]]$data, paste("data/Curation/Values",
                                          st, ".csv", sep=""), 
              row.names=FALSE)
}
```