# Curation Estaciones Calidad del Aire

Estudiar de que contaminantes no hay datos suficientes para el estudio en cada estacion. Se ha de comprobar que hay datos ($> 90\%$) durante el periodo de interes (`1-Enero-2020` <=> `31-Junio-2020`). Tambien se comprueban si hay intervalos largos de tiempo sin datos, utilizando una resolucion minima semanal, mensual y anual.

|   site   |  Pollutant   |   start_yr   |    end_yr    | hv.min  | missing.wk | missing.mnth | missing.yr |
|----------|--------------|--------------|--------------|---------|------------|--------------|------------|
| es0001a  |     no2      |  01-01-2015  |  02-01-2015  |  TRUE   |     34     |      2       |     0      |
| es0001a  |     no       |  01-01-2015  |  02-01-2015  |  TRUE   |     40     |     12       |     1      |
| es0001a  |      o3      |  01-01-2015  |  02-01-2015  |  FALSE  |      4     |      0       |     0      |

In [1]:
setwd("~/Repositories/AirQualityCOVID")

In [2]:
source("src/general.R")

## Funciones para filtrar datos

In [3]:
suppressMessages(library(lubridate))
suppressMessages(library(tidyverse))

In [4]:
have.2020 <- function(dataFrame, start_dt = ymd("2020-01-01"),
                                 end_dt = ymd("2020-06-30")) {
    
    if (as_date(min(dataFrame$date)) > start_dt) {
        boolean <- FALSE
    } else if (as_date(max(dataFrame$date)) < end_dt) {
        boolean <- FALSE
    } else {
        new.df <- dataFrame[dataFrame$date >= start_dt &
                            dataFrame$date <= end_dt, ]

        amount.data <- (sum(!is.na(new.df$value)) / length(new.df$value))

        boolean <- amount.data >= 0.9
    }
    boolean
}

In [103]:
get.missing <- function(dataFrame, unit="week", 
                                   start_dt=ymd("2010-01-01"), 
                                   end_dt=ymd("2020-12-31")) {
    
    conversion <- list("hour" = 3600,
                       "day" = 3600*24,
                       "week" = 3600*24*7,
                       "month" = (3600*24*365)/12,
                       "year" = 3600*24*365
                       )
    
    new.df <- group.by.date(dataFrame, unit=unit, FUN=mean)
    period <- (interval(round_date(start_dt, unit=unit),
                        round_date(end_dt, unit=unit)
                       )
               / conversion[[unit]])
    
    as.integer(period) - sum(!is.na(new.df$x)) + 1
}

## Estaciones de Calidad del Aire para el estudio

In [104]:
all.sites <- read.csv("data/curation/sitesAQ.csv",
                      stringsAsFactor=TRUE)

sites.lv <- levels(all.sites$site)
pollutants.lv <- levels(all.sites$variable)

## Informacion Relevante

In [105]:
all.info <- data.frame()

for (st in c("es1480a", "es1225a", "es1580a")) {
    dataAQ <- read.csv(paste("data/curation/dataAQ/",
                             st, ".csv", sep=""))
    for (pll in pollutants.lv) {
        dataAQPLL <- dataAQ[dataAQ$variable == pll, ]
        if (nrow(dataAQPLL) > 0) {
            start_yr <- as_date(min(dataAQPLL$date))
            end_yr <- as_date(max(dataAQPLL$date))
            
            hv.min <- have.2020(dataAQPLL)

            mss.wk <- get.missing(dataAQPLL, unit="week")
            mss.mnth <- get.missing(dataAQPLL, unit="month")
            mss.yr <- get.missing(dataAQPLL, unit="year")

            new.row <- data.frame(site=st, Pollutant=pll, 
                                  start_yr, end_yr,
                                  hv.min, mss.wk, mss.mnth, mss.yr)
            all.info <- rbind(new.row, all.info)
        }
    }
}

In [106]:
all.info

site,Pollutant,start_yr,end_yr,hv.min,mss.wk,mss.mnth,mss.yr
<chr>,<chr>,<date>,<date>,<lgl>,<dbl>,<dbl>,<dbl>
es1580a,pm10,2010-01-01,2020-12-30,True,8,0,0
es1580a,no2,2010-01-01,2020-12-31,True,9,0,0
es1580a,no,2010-01-01,2020-12-31,True,9,0,0
es1225a,pm2.5,2010-01-04,2018-12-29,False,115,24,2
es1225a,pm10,2010-01-02,2018-12-29,False,115,24,2
es1225a,o3,2010-01-01,2020-12-31,True,260,59,4
es1225a,no2,2010-01-01,2020-12-31,True,7,0,0
es1225a,no,2010-01-01,2020-12-31,True,7,0,0
es1480a,pm2.5,2010-01-27,2018-12-30,False,112,25,2
es1480a,pm10,2010-01-04,2018-12-30,False,108,24,2
