In [1]:
library(tidyverse)
library(caret)

── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.0     [32m✔[39m [34mdplyr  [39m 1.0.5
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: lattice


Attaching package: ‘caret’


The following object is masked from ‘package:purrr’:

    lift




In [2]:
setwd("~/Repositories/AirQualityCOVID/")

source("src/Cross-validation/useData.R")

In [3]:
sites.lv <- read.csv("data/Curation/checked_AQ.csv") %>%
            filter(#Municipio %in% c("Madrid", "Gijón"),
                   Pollutant == "o3") 

In [4]:
train.df <- open.data(sites = sites.lv$site, 
                      variable = c("o3"),
                      start_dt = lubridate::ymd("2013-01-01"), 
                      end_dt = lubridate::ymd("2020-01-01"),
                      airQuality.fl = "data/all/data_AQ.rda", 
                      meteo.fl = "data/all/meteorology.rda"
                     )

In [5]:
test.df <- open.data(sites = sites.lv$site, 
                     variable = c("o3"),
                     start_dt = lubridate::ymd("2019-12-31"), 
                     end_dt = lubridate::ymd("2021-01-01"),
                     airQuality.fl = "data/all/data_AQ.rda", 
                     meteo.fl = "data/all/meteorology.rda"
                    )

In [6]:
results <- data.frame()

for (nm in names(train.df)) {
    df.train <- train.df[[nm]] %>%
            add.yesterday.meteo(n.days=2) %>%
            na.omit() %>%
            select(-date)
    
    df.test <- test.df[[nm]] %>%
            add.yesterday.meteo(n.days=2) %>%
            na.omit()
    
    if (nrow(df.train) == 0) {
        next()
    }
    
    model <- train(o3 ~.,
               data=df.train,
               method="lm"
              )
    
    pred <- downscaleR:::eqm(df.train$o3,
                             predict(model),
                             predict(model, 
                                     newdata = df.test),
                             n.quantile=99,
                             precip=FALSE, pr.threshold=0,
                             extrapolation=""
                            )
    results <- rbind(results,
                     data.frame(site = nm,
                                date = df.test$date,
                                obs = df.test$o3,
                                pred = pred
                               ))
}

In [7]:
results$diff <- 100 *(results$obs - results$pred) / results$pred

In [8]:
head(results)

Unnamed: 0_level_0,site,date,obs,pred,diff
Unnamed: 0_level_1,<chr>,<date>,<dbl>,<dbl>,<dbl>
1,es0118a,2020-01-01,5.26625,8.064861,-34.70129
2,es0118a,2020-01-02,4.294583,10.435778,-58.8475
3,es0118a,2020-01-03,1.8875,13.73202,-86.25475
4,es0118a,2020-01-04,25.96,13.684595,89.70236
5,es0118a,2020-01-05,19.327083,10.959504,76.34998
6,es0118a,2020-01-06,11.130833,9.711151,14.6191


In [10]:
write.csv(results, 
          paste("data/Analysis/predictions-", "lm", "-o3", ".csv", sep=""))