In [1]:
library(jsonlite)
library(tidyverse)
# here library is not necessary as the data is big and in a different location

── [1mAttaching core tidyverse packages[22m ──────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m  masks [34mstats[39m::filter()
[31m✖[39m [34mpurrr[39m::[32mflatten()[39m masks [34mjsonlite[39m::flatten()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m     masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
library(showtext)
font_add_google("Lato", "lato")
showtext_auto()

Loading required package: sysfonts

Loading required package: showtextdb



In [3]:
DATA_FOLDER <- "/mnt/f/big-data/vr_data/Data/preprocessing-pipeline/interpolated-colliders"
DATA <- Sys.glob(file.path(DATA_FOLDER, "*_interpolatedColliders_*.json"))

In [75]:
get_buildings <- function(file_path = "../additional_Files/building_collider_list.csv") {
    building_csv_file <- file_path
    buildings <- read_csv(building_csv_file)
    buildings <- buildings %>% 
                    select(`...1`, target_collider_name, 
                            transformed_collidercenter_x, 
                            transformed_collidercenter_y) %>%
                    rename(
                        ID = `...1` ,
                        Name = target_collider_name,
                        x = transformed_collidercenter_x,
                        y = transformed_collidercenter_y
                    ) %>%
                    distinct(Name, .keep_all = TRUE)
    buildings
}

get_building_counter <- function() {
    buildings <- get_buildings()
    building_counter <- buildings %>% pull(Name) %>% reduce(function (acc, cur) {
            acc[[cur]] <- 0
            acc
        }, .init = list())
    # noData and newSession should be interpreted as buildings for the analysis before generating the graph
    building_counter[["noData"]] <- 0
    building_counter[["newSession"]] <- 0
    building_counter
}

In [4]:
DATA

In [18]:
data_file <- DATA[1]
data_file

In [9]:
file_name <- basename(data_file)
cur_p <- str_split(file_name, "_")[[1]][1] %>% as.integer()
pid <- cur_p
group <- groups[pids == pid]


# json data processing
cur_data <- read_json(data_file)


In [76]:
building_counter <- get_building_counter()

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m254[39m [1mColumns: [22m[34m8[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): source_collider_name, target_collider_name
[32mdbl[39m (6): ...1, ColliderBoundsCenter.x, ColliderBoundsCenter.y, ColliderBound...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [19]:
interpolated_data <- map(cur_data, function(x) x$hitObjectColliderName) %>% unlist()
durations <- map(cur_data, function(x) x$clusterDuration) %>% unlist()

In [54]:
# remove NH from the data
nh_index <- interpolated_data == "NH"
interpolated_data <- interpolated_data[!nh_index]
durations <- durations[!nh_index]

In [52]:
pids <- c(2002, 2005, 2008, 2009, 2015, 2016, 2017, 2018, 2024, 2006, 2007, 2013, 2014, 2021, 2020, 2025)
groups <- c("Control","Control","Control","Control","Control","Control","Control","Control","Control", "Glaucoma","Glaucoma","Glaucoma","Glaucoma","Glaucoma","Glaucoma", "Glaucoma")

In [64]:
pid <- cur_p
group <- groups[pids == pid]
noisy <- durations < 250

In [77]:
acc_data <- interpolated_data %>% reduce(function(acc, cur_building) {
    counter <- acc[[1]]
    data_table <- acc[[2]]

    counter[[cur_building]] <- counter[[cur_building]] + 1
    cur_row <- tibble(
            building = cur_building,
            view_count = counter[[cur_building]]
        )
    
    list(counter, bind_rows(data_table, cur_row))
}, .init = list(
        building_counter, 
        tibble(
            building = character(),
            view_count = numeric()
        )
    )
)
cur_data_tibble <- acc_data[[2]]

In [78]:
length(noisy)
length(interpolated_data)
length(durations)
nrow(cur_data_tibble)