# Curation Estaciones Meteorologicas

Estudiar La cantidad de datos validos disponibles para cada variable en cada estacion y obtener el numero de estaciones con los datos minimos necesarios para el estudio

In [1]:
setwd("~/Repositories/AirQualityCOVID")

In [2]:
source("src/general.R")

## Funciones para filtrar datos

In [3]:
suppressMessages(library(worldmet))
suppressMessages(library(lubridate))
suppressMessages(library(tidyverse))

In [4]:
get.info <- function(column, dataFrame) {
    new.df <- dataFrame[, c("date", column)]
    new.df <- new.df[complete.cases(new.df),]
    
    resolutions <- c()
    
    for (i in 1:(nrow(new.df)-1)) {
        resolutions <- c(resolutions, 
                         interval(new.df$date[i], 
                                  new.df$date[i+1]) /3600
                        )
    }
    
    if (!is.na(min(resolutions)) & min(resolutions) > 24) {
        resol <- NA
        amount <- sum(!is.na(dataFrame[, column])) / nrow(dataFrame[, column])
    } else {
        resol <- min(resolutions)
        amount <- resol * sum(!is.na(dataFrame[, column])) / nrow(dataFrame[, column])
    }
    
    c(resol, amount)
    
}

In [5]:
get.amount <- function(dataFrame, column, resolution) {
    resolution * sum(!is.na(dataFrame[, column])) / nrow(dataFrame[, column])
}

## Obtencion de los Datos

In [6]:
sitesMto <- read.csv("data/curation/sitesMto.csv",
                     stringsAsFactor=TRUE)
sites.lv <- levels(sitesMto$code)[1:3]
head(sitesMto)

Unnamed: 0_level_0,usaf,wban,station,ctry,st,call,latitude,longitude,elev.m.,begin,end,code,longr,latr,dist,siteAQ
Unnamed: 0_level_1,<int>,<int>,<fct>,<fct>,<lgl>,<fct>,<dbl>,<dbl>,<dbl>,<fct>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<fct>
1,80250,99999,BILBAO,SP,,LEBB,43.301,-2.911,42.1,1973-01-01,2021-02-06,080250-99999,-0.05080653,0.755745,5.464009,es0041a
2,80800,99999,VITORIA,SP,,LEVT,42.883,-2.724,512.7,1973-01-01,2021-02-06,080800-99999,-0.04754277,0.7484495,45.503738,es0041a
3,80210,99999,SANTANDER,SP,,LEXJ,43.427,-3.82,4.9,1973-01-01,2021-02-06,080210-99999,-0.06667158,0.7579441,73.13704,es0041a
4,80250,99999,BILBAO,SP,,LEBB,43.301,-2.911,42.1,1973-01-01,2021-02-06,080250-99999,-0.05080653,0.755745,5.36354,es0110a
5,80800,99999,VITORIA,SP,,LEVT,42.883,-2.724,512.7,1973-01-01,2021-02-06,080800-99999,-0.04754277,0.7484495,50.995824,es0110a
6,80230,99999,SANTANDER,SP,,,43.483,-3.8,59.0,1973-01-01,2021-02-06,080230-99999,-0.06632251,0.7589215,69.437339,es0110a


In [None]:
variables <- c('ws', 'wd', 'air_temp', 'atmos_pres', 'visibility', 'dew_point', 'RH', 
               'ceil_hgt', 'pwc', 'precip', 'cl_1', 'cl_2', 'cl_3', 'cl', 'cl_1_height', 
               'cl_2_height', 'cl_3_height', 'precip_12', 'precip_6')

all.resolution <- data.frame()
all.amount <- data.frame()

for (st in sites.lv) {
    # Get Data from worlmet
    dataMto <- importNOAA(code = st,
                          year = 2020,
                          hourly = TRUE,
                          n.cores = 6
                         )
    
    new.row <- data.frame(site = st, 
                          start_dt = as_date(min(dataMto$date)),
                          end_dt = as_date(max(dataMto$date))
                         )
    
    info.apply <- lapply(names(dataMto)[-(1:7)], get.info, dataFrame=dataMto)
    info.df <- data.frame(info.apply)
    names(info.df) <- names(dataMto)[-(1:7)]
    
    resolution.row <- cbind(new.row, info.df[1,])
    amount.row <- cbind(new.row, info.df[2,])
    
    for (cl in variables[-which(variables %in% names(dataMto))]) {
        resolution.row[, cl] <- 0
        amount.row[, cl] <- 0
    }
    
    all.resolution <- rbind(resolution.row, all.resolution)
    all.amount <- rbind(amount.row, all.amount)
}