In [1]:
suppressMessages(library(tidyverse))
suppressMessages(library(caret))

In [2]:
setwd("~/Repositories/AirQualityCOVID/")

source("src/Cross-validation/useData.R")
source("src/Cross-validation/preProcess.R")

In [3]:
variable <- "pm2.5"
method <- "lm"

In [4]:
sites.lv <- read.csv("data/Curation/checked_AQ.csv") %>%
            filter(Pollutant == variable) 

In [5]:
train.df <- open.data(sites = sites.lv$site, 
                      variable = c(variable),
                      start_dt = lubridate::ymd("2013-01-01"), 
                      end_dt = lubridate::ymd("2020-01-01"),
                      airQuality.fl = "data/all/data_AQ.rda", 
                      meteo.fl = "data/all/meteorology.rda"
                     )

test.df <- open.data(sites = sites.lv$site, 
                     variable = c(variable),
                     start_dt = lubridate::ymd("2019-12-31"), 
                     #end_dt = lubridate::ymd("2021-01-01"),
                     airQuality.fl = "data/all/data_AQ.rda", 
                     meteo.fl = "data/all/meteorology.rda"
                    )

In [6]:
results <- data.frame()

for (nm in names(train.df)) {
    df.train <- train.df[[nm]] %>%
            filter(pm2.5 > 0) %>%
            filter.IQR(columns=variable, n=5) %>%
            add.yesterday.meteo(n.days=3) %>%
            na.omit() %>%
            select(-date)
    
    df.test <- test.df[[nm]] %>%
            filter(pm2.5 > 0) %>%
            add.yesterday.meteo(n.days=3) %>%
            na.omit() 
    
    if (nrow(df.train) == 0) {
        next()
    }
    
    model <- train(pm2.5 ~.,
               data=df.train,
               method="lm"
              )
    pred <- predict(model, 
                    newdata = df.test %>% select(-date, -pm2.5))
    
    pred.qq <- downscaleR:::eqm(df.train$pm2.5,
                             predict(model),
                             pred,
                             n.quantile=99,
                             precip=FALSE, pr.threshold=0,
                             extrapolation=""
                            )
    results <- rbind(results,
                     data.frame(site = nm,
                                date = df.test$date,
                                obs = df.test$pm2.5,
                                pred.qq = pred.qq
                               ))
}

In [7]:
save(results, 
     file=paste("data/Analysis/predictions-", method, 
           "-", variable, ".rda", sep=""))