Skip to content

Commit

Permalink
Update preprocessing.R
Browse files Browse the repository at this point in the history
  • Loading branch information
cpetinon committed Jun 10, 2024
1 parent 962a867 commit d758097
Showing 1 changed file with 39 additions and 36 deletions.
75 changes: 39 additions & 36 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#' @description
#' A short description...
#' Retrieve hours with no data and replace incomplete data with NA,
#'
#'
#' @param enriched_data enriched data.frame containing all the data for all your sensors
#' @param date_range Date vector. example: c('2021-01-01','2022-01-01'). Full period if NULL (default).
#' @param segments Character vector. Selected road segment, all if NULL (default).
#' @param successive_day Integer. Number of day choosen. Default to 2
#' @param uptime_choice Real. Uptime choosen. Default to 0.5
#'
#' @return enriched_data
#'
#' @return enriched_data
#' @export
#'
#' @import dplyr
Expand All @@ -19,46 +22,50 @@
#' segment = 'RteVitre-06',
#' uptime_choice=0.3,
#' successive_day=1)

retrieve_missing_data<- function(enriched_data,
date_range = NULL,
segments = NULL,
uptime_choice=0.5,
successive_day=2)
{

if(!is.null(segments))
{enriched_data<-enriched_data[enriched_data$segment_name==segments,]}

if(!is.null(date_range))
{enriched_data<-enriched_data[enriched_data$day>=date_range[1] & enriched_data$day<= date_range[2],]}

if(length(enriched_data$car)==0){stop("No data in the selectionned period")}

else
{
#Hours with no data
enriched_data<-retrieve_missing_hours(enriched_data,uptime_choice)

enriched_data<-retrieve_missing_hours(enriched_data,uptime_choice)
#Inactivity Period
enriched_data <- enriched_data %>%
mutate(
heavy_NA = .data$heavy,
car_NA = .data$car,
bike_NA = .data$bike,
pedestrian_NA = .data$pedestrian
)
heavy_NA = heavy,
car_NA = car,
bike_NA = bike,
pedestrian_NA = pedestrian
)

replace_inactivity_period(enriched_data,successive_day,uptime_choice)
}
return(enriched_data)
}

#' Retrieve hours with no data
#' @description
#' A short description...
#' Retrieve hours with no data
#'
#'
#' @param enriched_data enriched data.frame containing all the data for all your sensors
#' @param uptime_choice Real. Uptime choosen. Default to 0.5
#'
#' @return enriched_data
#'
#' @return enriched_data
#' @export
#'
#' @import dplyr
Expand All @@ -68,35 +75,31 @@ retrieve_missing_data<- function(enriched_data,
#' retrieve_missing_hours(traffic)
#' retrieve_missing_hours(traffic,
#' uptime_choice=0.3)

retrieve_missing_hours<-function(enriched_data,uptime_choice)
{
enriched_data$date <- ymd_hms(enriched_data$date)
enriched_data$date <- ymd_hms(enriched_data$date)
enriched_data$season <- ifelse(month(enriched_data$date) %in% c(3,4,5), "Spring",
ifelse(month(enriched_data$date) %in% c(6,7,8), "Summer",
ifelse(month(enriched_data$date) %in% c(9,10,11), "Autumn", "Winter")))

df_season<-enriched_data %>%
group_by(.data$segment_id,
.data$season,
.data$hour) %>%
summarise(condition=any(.data$car!=0 & .data$uptime>uptime_choice))
df_season<-enriched_data %>% group_by(segment_id,season,hour) %>% summarise(condition=any(car!=0 & uptime>uptime_choice))

enriched_data <- enriched_data %>%
semi_join(df_season %>%
filter(.data$condition),
by = c("segment_id","season", "hour")
)
enriched_data <- enriched_data %>% semi_join(df_season %>% filter(condition), by = c("segment_id","season", "hour"))

return(enriched_data)
}

#' @description
#' A short description...
#' Replace incomplete data with NA,
#'
#'
#' @param enriched_data enriched data.frame containing all the data for all your sensors
#' @param successive_day Integer. Number of day choosen. Default to 2
#' @param uptime_choice Real. Uptime choosen. Default to 0.5
#'
#' @return enriched_data
#'
#' @return enriched_data
#' @export
#'
#' @import dplyr
Expand All @@ -107,11 +110,12 @@ retrieve_missing_hours<-function(enriched_data,uptime_choice)
#' replace_inactivity_period(traffic,
#' uptime_choice=0.3,
#' successive_day=1)

replace_inactivity_period<-function (enriched_data,successive_day,uptime_choice)
{
list_clear_data <- list()
seg_id<-unique(enriched_data$segment_id)

for(id in 1:length(seg_id))
{
df_segment<-enriched_data[enriched_data$segment_id==seg_id[id],]
Expand All @@ -120,25 +124,24 @@ replace_inactivity_period<-function (enriched_data,successive_day,uptime_choice)
j=i
while ((df_segment$car[i]==0 | df_segment$uptime[i]<uptime_choice) & i<length(df_segment$car))
{i<-i+1}

diff_days<-abs(as.numeric(difftime(df_segment$day[i], df_segment$day[j], units = "days")))

if(diff_days>successive_day)
{
df_segment <- df_segment %>%
mutate_at(vars(.data$heavy_NA, .data$car_NA, .data$bike_NA, .data$pedestrian_NA),
~ ifelse(row_number() %in% j:i, NA,.))
mutate_at(vars(heavy_NA, car_NA, bike_NA,pedestrian_NA), ~ ifelse(row_number() %in% j:i, NA,.))
}
}
list_clear_data[[id]]<-df_segment
}
enriched_data<-list_clear_data[[1]]

if(length(seg_id)>1)
{
for(i in 2:length(seg_id))
{enriched_data<-rbind(enriched_data,list_clear_data[[i]])}
}

return(enriched_data)
}

0 comments on commit d758097

Please sign in to comment.