In [1]:
suppressMessages(library(tidyverse))
suppressMessages(library(caret))

In [2]:
setwd("~/Repositories/AirQualityCOVID/")

source("src/Cross-validation/preprocess.R")

In [3]:
pairs <- read.csv("data/Curation/checked_AQ.csv") %>%
            select(site, Pollutant) %>%
            filter(site != "es1573a") %>%
            mutate(names=paste(site, Pollutant, sep="-"))

In [4]:
data.df <- open.data(pairs[,1],
                     aq.file = "data/all/data_AQ.rda",
                     meteo.file = "data/all/meteorology.rda") %>%
                add.yesterday.meteo(n.days = 3) %>%
                filter(site != "es1573a")

train.df <- data.df %>%
                filter(date < lubridate::ymd("2020-01-01"))
to.predict.df <- data.df %>%
                filter(date >= lubridate::ymd("2020-01-01"))

In [5]:
allmodel <- vector(mode="list", length=nrow(pairs))
names(allmodel) <- pairs$names

In [6]:
predictions <- data.frame()

for (i in 1:nrow(pairs)) {
    slice.train <- train.df %>%
                    filter(site == pairs[i, 1],
                           variable == pairs[i, 2]) %>%
                    filter.IQR(columns = "value", n = 5) %>%
                    select(-date, -site, -variable) %>%
                    na.omit
    
    model <- lm(value ~ ., data=slice.train, na.action=na.omit)
    
    slice.to.predict <- to.predict.df %>%
                            filter(site == pairs[i, 1],
                                   variable == pairs[i, 2])
    
    pred <- predict(model, newdata = slice.to.predict)
    
    predictions <- rbind(predictions,
                         data.frame(site=pairs[i, 1],
                                    variable=pairs[i, 2],
                                    date=slice.to.predict$date,
                                    obs=slice.to.predict$value,
                                    pred=pred
                                   ))
}

In [7]:
write.csv(predictions, 
          "data/Results/predictions.csv", 
          row.names=F)