From d758097c9b58a01f77cc65746558166b12602e07 Mon Sep 17 00:00:00 2001 From: cpetinon <168358472+cpetinon@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:03:10 +0200 Subject: [PATCH] Update preprocessing.R --- R/preprocessing.R | 75 ++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index d95f3b3..29a44d0 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1,12 +1,15 @@ +#' @description +#' A short description... #' Retrieve hours with no data and replace incomplete data with NA, #' +#' #' @param enriched_data enriched data.frame containing all the data for all your sensors #' @param date_range Date vector. example: c('2021-01-01','2022-01-01'). Full period if NULL (default). #' @param segments Character vector. Selected road segment, all if NULL (default). #' @param successive_day Integer. Number of day choosen. Default to 2 #' @param uptime_choice Real. Uptime choosen. Default to 0.5 -#' -#' @return enriched_data +#' +#' @return enriched_data #' @export #' #' @import dplyr @@ -19,46 +22,50 @@ #' segment = 'RteVitre-06', #' uptime_choice=0.3, #' successive_day=1) + retrieve_missing_data<- function(enriched_data, date_range = NULL, segments = NULL, uptime_choice=0.5, successive_day=2) { - + if(!is.null(segments)) {enriched_data<-enriched_data[enriched_data$segment_name==segments,]} - + if(!is.null(date_range)) {enriched_data<-enriched_data[enriched_data$day>=date_range[1] & enriched_data$day<= date_range[2],]} - + if(length(enriched_data$car)==0){stop("No data in the selectionned period")} - + else { #Hours with no data - enriched_data<-retrieve_missing_hours(enriched_data,uptime_choice) - + enriched_data<-retrieve_missing_hours(enriched_data,uptime_choice) + #Inactivity Period enriched_data <- enriched_data %>% mutate( - heavy_NA = .data$heavy, - car_NA = .data$car, - bike_NA = .data$bike, - pedestrian_NA = .data$pedestrian - ) + heavy_NA = heavy, + car_NA = car, + bike_NA = bike, + pedestrian_NA = pedestrian + ) replace_inactivity_period(enriched_data,successive_day,uptime_choice) } return(enriched_data) } -#' Retrieve hours with no data +#' @description +#' A short description... +#' Retrieve hours with no data +#' #' #' @param enriched_data enriched data.frame containing all the data for all your sensors #' @param uptime_choice Real. Uptime choosen. Default to 0.5 -#' -#' @return enriched_data +#' +#' @return enriched_data #' @export #' #' @import dplyr @@ -68,35 +75,31 @@ retrieve_missing_data<- function(enriched_data, #' retrieve_missing_hours(traffic) #' retrieve_missing_hours(traffic, #' uptime_choice=0.3) + retrieve_missing_hours<-function(enriched_data,uptime_choice) { - enriched_data$date <- ymd_hms(enriched_data$date) + enriched_data$date <- ymd_hms(enriched_data$date) enriched_data$season <- ifelse(month(enriched_data$date) %in% c(3,4,5), "Spring", ifelse(month(enriched_data$date) %in% c(6,7,8), "Summer", ifelse(month(enriched_data$date) %in% c(9,10,11), "Autumn", "Winter"))) - df_season<-enriched_data %>% - group_by(.data$segment_id, - .data$season, - .data$hour) %>% - summarise(condition=any(.data$car!=0 & .data$uptime>uptime_choice)) + df_season<-enriched_data %>% group_by(segment_id,season,hour) %>% summarise(condition=any(car!=0 & uptime>uptime_choice)) - enriched_data <- enriched_data %>% - semi_join(df_season %>% - filter(.data$condition), - by = c("segment_id","season", "hour") - ) + enriched_data <- enriched_data %>% semi_join(df_season %>% filter(condition), by = c("segment_id","season", "hour")) return(enriched_data) } +#' @description +#' A short description... #' Replace incomplete data with NA, #' +#' #' @param enriched_data enriched data.frame containing all the data for all your sensors #' @param successive_day Integer. Number of day choosen. Default to 2 #' @param uptime_choice Real. Uptime choosen. Default to 0.5 -#' -#' @return enriched_data +#' +#' @return enriched_data #' @export #' #' @import dplyr @@ -107,11 +110,12 @@ retrieve_missing_hours<-function(enriched_data,uptime_choice) #' replace_inactivity_period(traffic, #' uptime_choice=0.3, #' successive_day=1) + replace_inactivity_period<-function (enriched_data,successive_day,uptime_choice) { list_clear_data <- list() seg_id<-unique(enriched_data$segment_id) - + for(id in 1:length(seg_id)) { df_segment<-enriched_data[enriched_data$segment_id==seg_id[id],] @@ -120,25 +124,24 @@ replace_inactivity_period<-function (enriched_data,successive_day,uptime_choice) j=i while ((df_segment$car[i]==0 | df_segment$uptime[i]successive_day) { df_segment <- df_segment %>% - mutate_at(vars(.data$heavy_NA, .data$car_NA, .data$bike_NA, .data$pedestrian_NA), - ~ ifelse(row_number() %in% j:i, NA,.)) + mutate_at(vars(heavy_NA, car_NA, bike_NA,pedestrian_NA), ~ ifelse(row_number() %in% j:i, NA,.)) } } list_clear_data[[id]]<-df_segment } enriched_data<-list_clear_data[[1]] - + if(length(seg_id)>1) { for(i in 2:length(seg_id)) {enriched_data<-rbind(enriched_data,list_clear_data[[i]])} } - + return(enriched_data) }