diff --git a/stop_sensor.R b/stop_sensor.R index 1139eea..dfe0124 100644 --- a/stop_sensor.R +++ b/stop_sensor.R @@ -1,115 +1,94 @@ -stop_sensor<- function(enriched_data, - date_range = NULL, - weekday_choice = NULL, - hour_choice = NULL, - vacation_choice=NULL, - holiday_choice=NULL, - segments = NULL, - uptime_choice=0.5, - successive_day=2, - remove_data=FALSE) -{ - # Input parameter : - # - df_init : Initial Dataframe - # - uptime_choice : Real which define the quality of the data below which we do not need the data - # - successive_day : Integer which define number of successives days above which we do not need the data - # - remove_data : Booleen which choose if we keep the row or not - # Output parameter : - # - df_fin : Dataframe - # Description : - # Remove hours where we have no information as the night or less than 10% of information - # Remove periods when the sensor did not work - # - # Packages : - # - lubridate - # - dplyr - - #Filtrer les données en fonction des demandes de l'utilisateur - if(!is.null(segments)) - {enriched_data<-enriched_data[enriched_data$segment_name==segments,]} - - if(!is.null(date_range)) - {enriched_data<-enriched_data[enriched_data$day>=date[1] & enriched_data$day<= date[2],]} +#' @description +#' A short description... +#' Retrieve hours with no data and replace incomplete data with NA, +#' +#' +#' @param enriched_data enriched data.frame containing all the data for all your sensors +#' @param date_range Date vector. example: c('2021-01-01','2022-01-01'). Full period if NULL (default). +#' @param segments Character vector. Selected road segment, all if NULL (default). +#' @param weekday Character vector. Weekday choosen. Default to the all week. +#' @param uptime Real. Uptime choosen. Default to 0.5. +#' @param successive_day Integer. Number of day choosen. Default to 2 +#' +#' @return enriched_data +#' @export +#' +#' @import dplyr +#' @import lubridate +#' +#' @examples +#' retrieve_missing_data(traffic) +#' retrieve_missing_data(traffic, +#' date_range = c('2022-07-01','2022-09-01'), +#' segment = 'RteVitre-06', +#' uptime_choice=0.3, +#' successive_day=1) - if(length(enriched_data$car)==0){print("Aucune donnée sur la période sélectionnée")} - - else - { - #Créer une colonne pour avoir les saisons - enriched_data$date <- ymd_hms(enriched_data$date) #mettre au format date - enriched_data$season <- ifelse(month(enriched_data$date) %in% c(3,4,5), "Spring", - ifelse(month(enriched_data$date) %in% c(6,7,8), "Summer", - ifelse(month(enriched_data$date) %in% c(9,10,11), "Autumn", "Winter"))) - - - # Remove hours with no information by season - df_season<-enriched_data %>% group_by(segment_id,season,hour) %>% summarise(condition=any(car!=0 & uptime>uptime_choice)) - - enriched_data <- enriched_data %>% - semi_join(df_season %>% filter(condition), by = c("segment_id","season", "hour")) +retrieve_missing_data<- function(enriched_data, + date_range = NULL, + segments = NULL, + uptime_choice=0.5, + successive_day=2) +{ - print(length(enriched_data$car)) + if(!is.null(segments)) + {enriched_data<-enriched_data[enriched_data$segment_name==segments,]} - #Périodes d'inactivités - rm<-c() - rm_fin<-c() + if(!is.null(date_range)) + {enriched_data<-enriched_data[enriched_data$day>=date_range[1] & enriched_data$day<= date_range[2],]} - list_clear_data <- list() - seg_id<-unique(enriched_data$segment_id) + if(length(enriched_data$car)==0){stop("No data in the selectionned period")} - for(id in 1:length(seg_id)) + else { - df_segment<-enriched_data[enriched_data$segment_id==seg_id[id],] - for(i in 1:length(df_segment$car)) - { j=i + #Hours with no data + enriched_data$date <- ymd_hms(enriched_data$date) + enriched_data$season <- ifelse(month(enriched_data$date) %in% c(3,4,5), "Spring", + ifelse(month(enriched_data$date) %in% c(6,7,8), "Summer", + ifelse(month(enriched_data$date) %in% c(9,10,11), "Autumn", "Winter"))) + + df_season<-enriched_data %>% group_by(segment_id,season,hour) %>% summarise(condition=any(car!=0 & uptime>uptime_choice)) + + enriched_data <- enriched_data %>% semi_join(df_season %>% filter(condition), by = c("segment_id","season", "hour")) + + #Inactivity Period + enriched_data <- enriched_data %>% + mutate( + heavy_NA = heavy, + car_NA = car, + bike_NA = bike, + pedestrian_NA = pedestrian + ) + + list_clear_data <- list() + seg_id<-unique(enriched_data$segment_id) + + for(id in 1:length(seg_id)) + { + df_segment<-enriched_data[enriched_data$segment_id==seg_id[id],] + for(i in 1:length(df_segment$car)) + { + j=i while ((df_segment$car[i]==0 | df_segment$uptime[i]successive_day) - {rm_fin<-c(rm_fin,rm)} - - rm<-c()} - - if (is.null(rm_fin)) - {df_segment<-df_segment} - - else - { if(remove_data==TRUE) - {df_segment<-df_segment[-rm_fin,]} + { + df_segment <- df_segment %>% + mutate_at(vars(heavy_NA, car_NA, bike_NA,pedestrian_NA), ~ ifelse(row_number() %in% j:i, NA,.)) + } + } + list_clear_data[[id]]<-df_segment + } + enriched_data<-list_clear_data[[1]] - else - { colonnes_voulues<-c('heavy','car','bike','pedestrian','heavy_lft','heavy_rgt','car_rgt','car_lft','bike_lft','bike_rgt','pedestrian_lft','pedestrian_rgt') - df_segment[rm_fin,colonnes_voulues]=NA} - } - list_clear_data[[id]]<-df_segment - } - df_fin<-list_clear_data[[1]] - - if(length(seg_id)>1) - {for(i in 2:length(seg_id)) - {df_fin<-rbind(df_fin,list_clear_data[[i]])}} - - - #Filtration des données avec les demandes de l'utilisateur - - if(!is.null(weekday_choice)) - { - df_fin$weekday<-tolower(df_fin$weekday) - tolower(weekday_choice) - df_fin<-df_fin %>% filter(weekday %in% weekday_choice) } - - if(!is.null(hour_choice)) - {df_fin<-df_fin %>% filter(hour %in% hour_choice)} - - if(!is.null(vacation_choice)) - {df_fin<-df_fin %>% filter(vacation %in% vacation_choice)} - - if(!is.null(holiday_choice)) - {df_fin<-df_fin %>% filter(holiday %in% holiday_choice)} - - return(df_fin) - } + if(length(seg_id)>1) + { + for(i in 2:length(seg_id)) + {enriched_data<-rbind(enriched_data,list_clear_data[[i]])} + } + } + return(enriched_data) }