# Merge All Data

Merge air quality, AEMET climate and ERA5-Land data for each station.

In [1]:
# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))

In [2]:
# Working directory
setwd("~/Repositories/AirQualityCOVID")

# Source some scripts 
source("src/functions.R")

### Main Variables

In [3]:
start_dt <- ymd_hms("2013-01-01 00:00:00")
end_dt <- ymd_hms("2020-12-31 00:00:00")

## Stations Information

In [4]:
#-----------------------------
#    Air Quality Stations
#-----------------------------

sites.AQ <- read.csv("data/Curation/checked_AQ.csv",
                    stringsAsFactor=T)

#-----------------------------
#       AEMET Stations
#-----------------------------

sites.AEMET <- read.csv("data/Curation/checked_AEMET.csv",
                        stringsAsFactor=F, colClasses=c("indicativo"="character"))

#-----------------------------
#       WorldMet Stations
#-----------------------------

sites.WorldMet <- read.csv("data/Curation/checked_WorldMet.csv",
                     stringsAsFactor=T)

In [5]:
head(sites.AEMET)

Unnamed: 0_level_0,latitud,provincia,altitud,indicativo,nombre,indsinop,longitud,dist,siteAQ
Unnamed: 0_level_1,<dbl>,<chr>,<int>,<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>
1,40.41194,MADRID,667,3195,"MADRID, RETIRO",8222.0,-3.678056,1.137154,es0118a
2,37.27833,HUELVA,19,4642E,"HUELVA, RONDA ESTE",8383.0,-6.911667,3.467861,es1340a
3,41.41833,BARCELONA,408,0200E,"BARCELONA, FABRA",,2.124167,4.424307,es1438a
4,43.49111,CANTABRIA,52,1111,SANTANDER,8023.0,-3.800556,3.458738,es1580a


## n-Table 

Table with all the relations between Air Quality stations with AEMET and WorldMet stations code

| siteAQ  | AEMET |   WorldMet   |
|---------|-------|--------------|
| es1580a | 1111  | 080210-99999 |
| es0118a | 3195  | 082230-99999 |
| es1438a | 0201D | 081810-99999 |
| es1340a | 4642E | 083830-99999 |

In [6]:
nn.stations <- data.frame()

for (st in levels(sites.AQ$site)) {
    nn.stations <- rbind(nn.stations,
                         data.frame(siteAQ = as.factor(st),
                                    AEMET = as.factor(sites.AEMET[sites.AEMET$siteAQ == st,
                                                                  "indicativo"]),
                                    WorldMet = sites.WorldMet[sites.WorldMet$siteAQ == st,
                                                              "code"]
                                   )
                        )
}

head(nn.stations)

Unnamed: 0_level_0,siteAQ,AEMET,WorldMet
Unnamed: 0_level_1,<fct>,<fct>,<fct>
1,es0118a,3195,082230-99999
2,es1340a,4642E,083830-99999
3,es1438a,0200E,081810-99999
4,es1580a,1111,080210-99999


## ERA5-Land Data

In [7]:
#-----------------------------
#    ERA5-Land Data
#-----------------------------

folder.ERA5.Land <- "data/Curation/ERA5-Land/"

# Relative Humidity
load(paste(folder.ERA5.Land,
           "rh_daily_2010_2020_final_stations.rda", sep=""))

rh.ERA5.Land <- data.as.datetime(df, "dates", "ymd") %>%
                    add_column(variable="RH", .after="dates")

# Solar radiation
load(paste(folder.ERA5.Land, 
           "ssrd_daily_2010_2020_final_stations.rda", sep=""))

ssrd.ERA5.Land <- data.as.datetime(df, "dates", "ymd") %>%
                    add_column(variable="solar.radiation", .after="dates")

ERA5.Land <- rbind(rh.ERA5.Land, ssrd.ERA5.Land)

## Merge Process

In [8]:
# Folder wherever take downloaded data, if it exists
Mto.files <- "data/Curation/"

data_Mto <- data.frame()

In [9]:
for (st in levels(nn.stations$siteAQ)) {
    
    code <- nn.stations[nn.stations$siteAQ == st, "WorldMet"]
    
    data.WorldMet <- read.csv(paste(Mto.files, "WorldMet/",
                                 code, ".csv", sep=""), stringsAsFactor=F) %>%
                    data.as.datetime("date", "ymd") %>%
                    select(-"code")
    
    indicativo <- nn.stations[nn.stations$siteAQ == st, "AEMET"]
    
    data.AEMET <- read.csv(paste(Mto.files, "AEMET/",
                                 indicativo, ".csv", sep=""), stringsAsFactor=F) %>%
                    data.as.datetime("fecha", "ymd") %>%
                    select("fecha", "tmed", "prec", 
                           "tmin", "tmax", "presMax", "presMin"
                           )
    
    data.row <- merge(x = data.WorldMet, y = data.AEMET,
                        by.x = "date", by.y = "fecha", all = TRUE)
    
    for (vr in levels(as.factor(ERA5.Land$variable))) {
        data.row <- merge(x = data.row, 
                          y = ERA5.Land[ERA5.Land$variable == vr,
                                        c("dates", st)],
                          by.x = "date", by.y = "dates", all.x = TRUE
                         )
        names(data.row)[ncol(data.row)] <- vr
    }
    
    data_Mto <- rbind(data_Mto, data.row)
}

In [10]:
head(data.row)

Unnamed: 0_level_0,date,ws,wd,atmos_pres,tmed,prec,tmin,tmax,presMax,presMin,RH,solar.radiation
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2013-01-01,5.5188596,276.71826,1020.7,11.6,55,10.6,12.6,1019.6,1007.3,88.11917,78.56742
2,2013-01-02,2.4236842,306.10747,1031.96,11.2,7,9.8,12.5,1028.7,1019.6,86.44662,60.13676
3,2013-01-03,2.2947368,84.23836,1038.16,10.8,0,8.4,13.2,1030.6,1028.7,85.19767,48.69664
4,2013-01-04,0.2657895,126.09937,1038.04,11.4,0,8.0,14.7,1031.2,1029.6,83.13043,81.60853
5,2013-01-05,0.777193,191.7522,1035.86,9.1,0,7.7,10.5,1029.7,1027.6,89.72875,89.20845
6,2013-01-06,1.1710526,100.45407,1031.24,10.5,0,8.0,13.0,1027.7,1022.4,90.22318,86.77671


## Guardar Datos

```R
save(data_Mto, nn.stations, file="data/meteorology.rda")
```