# Curation Estaciones Meteorologicas

Estudiar La cantidad de datos validos disponibles para cada variable en cada estacion y obtener el numero de estaciones con los datos minimos necesarios para el estudio

In [1]:
setwd("~/Repositories/AirQualityCOVID")

In [2]:
source("src/general.R")

## Funciones para filtrar datos

In [3]:
suppressMessages(library(worldmet))
suppressMessages(library(lubridate))
suppressMessages(library(tidyverse))

In [100]:
get.info <- function(column, dataFrame) {
    new.df <- dataFrame[, c("date", column)]
    new.df$date <- as.numeric(new.df$date)
    new.df <- new.df[complete.cases(new.df),]
    
    resolutions <- apply(new.df[-2], 2, diff) / 3600
    
    
    resol <- min(resolutions)
    amount <- resol * sum(!is.na(dataFrame[, column])) / nrow(dataFrame[, column])
    
    c(resol, amount)
}

## Obtencion de los Datos

In [121]:
sitesMto <- read.csv("data/curation/sitesMto.csv",
                     stringsAsFactor=TRUE)
sites.lv <- levels(sitesMto$code)[1:2]
head(sitesMto)

Unnamed: 0_level_0,usaf,wban,station,ctry,st,call,latitude,longitude,elev.m.,begin,end,code,longr,latr,dist,siteAQ
Unnamed: 0_level_1,<int>,<int>,<fct>,<fct>,<lgl>,<fct>,<dbl>,<dbl>,<dbl>,<fct>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<fct>
1,80250,99999,BILBAO,SP,,LEBB,43.301,-2.911,42.1,1973-01-01,2021-02-08,080250-99999,-0.05080653,0.755745,5.464009,es0041a
2,80800,99999,VITORIA,SP,,LEVT,42.883,-2.724,512.7,1973-01-01,2021-02-08,080800-99999,-0.04754277,0.7484495,45.503738,es0041a
3,80210,99999,SANTANDER,SP,,LEXJ,43.427,-3.82,4.9,1973-01-01,2021-02-08,080210-99999,-0.06667158,0.7579441,73.13704,es0041a
4,80250,99999,BILBAO,SP,,LEBB,43.301,-2.911,42.1,1973-01-01,2021-02-08,080250-99999,-0.05080653,0.755745,5.36354,es0110a
5,80800,99999,VITORIA,SP,,LEVT,42.883,-2.724,512.7,1973-01-01,2021-02-08,080800-99999,-0.04754277,0.7484495,50.995824,es0110a
6,80230,99999,SANTANDER,SP,,,43.483,-3.8,59.0,1973-01-01,2021-02-08,080230-99999,-0.06632251,0.7589215,69.437339,es0110a


In [122]:
variables <- c('ws', 'wd', 'air_temp', 'atmos_pres', 'visibility', 'dew_point', 'RH', 
               'ceil_hgt', 'pwc', 'precip', 'cl_1', 'cl_2', 'cl_3', 'cl', 'cl_1_height', 
               'cl_2_height', 'cl_3_height', 'precip_12', 'precip_6')

all.resolution <- data.frame()
all.amount <- data.frame()

for (st in sites.lv) {
    # Get Data from worlmet
    dataMto <- importNOAA(code = st,
                          year = 2020,
                          hourly = TRUE,
                          n.cores = 6
                         )
    new.row <- data.frame(site = st, 
                          start_dt = as_date(min(dataMto$date)),
                          end_dt = as_date(max(dataMto$date))
                         )
    nm <- names(dataMto)[-(1:6)]
    if ("pwc" %in% nm) {
        nm <- nm[-which(nm == "pwc")]
    }

    info.df <- data.frame(lapply(nm, get.info, dataFrame=dataMto))
    names(info.df) <- nm

    resolution.row <- cbind(new.row, info.df[1,])
    amount.row <- cbind(new.row, info.df[2,])

    for (cl in variables[-which(variables %in% names(amount.row))]) {
        resolution.row[, cl] <- 0
        amount.row[, cl] <- 0
    }

    all.resolution <- rbind(resolution.row, all.resolution)
    all.amount <- rbind(amount.row, all.amount)
}

In [123]:
head(all.resolution)
head(all.amount)

Unnamed: 0_level_0,site,start_dt,end_dt,ws,wd,air_temp,atmos_pres,visibility,dew_point,RH,⋯,cl_2,cl_3,cl,cl_1_height,cl_2_height,cl_3_height,precip,pwc,precip_12,precip_6
Unnamed: 0_level_1,<fct>,<date>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,077490-99999,2020-01-01,2020-12-31,1,1,1,1,3,1,1,⋯,3,3,3,3,3,3,1,0,0,0
2,076000-99999,2020-01-01,2020-12-31,1,1,1,1,1,1,1,⋯,2,3,1,1,2,3,1,0,0,0


Unnamed: 0_level_0,site,start_dt,end_dt,ws,wd,air_temp,atmos_pres,visibility,dew_point,RH,⋯,cl_2,cl_3,cl,cl_1_height,cl_2_height,cl_3_height,precip,pwc,precip_12,precip_6
Unnamed: 0_level_1,<fct>,<date>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2,077490-99999,2020-01-01,2020-12-31,0.9943078,0.9891849,0.9792805,0.9792805,0.4965847,0.963684,0.963684,⋯,0.09289617,0.00239071,0.4176913,0.4173497,0.09289617,0.00239071,0.9906648,0,0,0
21,076000-99999,2020-01-01,2020-12-31,0.9242942,0.920082,0.9242942,0.9240665,0.1934199,0.9242942,0.9242942,⋯,0.15255009,0.03278689,0.1727004,0.1727004,0.15255009,0.03278689,0.9198543,0,0,0
