---
title: "Binary representation table"
format: html
execute:
    eval: true
jupyter: IRkernel
---

In [34]:
library(jsonlite)
library(DT)
library(IRdisplay)
library(htmlwidgets)

In [35]:
config_dir <- "../phenoRankeR/inst/extdata/config"
json_data <- fromJSON(file.path(config_dir, "pheno_blast_col_colors.json"))

In [51]:
user <- "pheno.ranker@playground.com"
run_id <- "20241006015118" #working
input_format <- "bff" #working

# run_id <- "20241006003308" #not working
# input_format <- "json" #not working

user_dir <- file.path("../data/user_data", user)
out_dir <- file.path(user_dir, "output/rankedPatients", run_id)
file_suffix <- "_alignment.csv"

file_path <- file.path(out_dir, paste0(run_id, file_suffix))
print(file_path)

# binary representation of each patient
bin_df <- read.table(
    file_path,
    header = TRUE,
    sep = ";",
    row.names = NULL
)
head(bin_df)

[1] "../data/user_data/pheno.ranker@playground.com/output/rankedPatients/20241006015118/20241006015118_alignment.csv"


Unnamed: 0_level_0,Id,iron.deficiency.anaemia,thyroiditis,acute.bronchitis,asthma,dental.caries,Black.or.Black.British,Asian.or.Asian.British,British,African,⋯,X2021.09.24,Kilogram,BMI,X2021.09.24.1,Kilogram.per.Square.Meter,Height.standing,X2021.09.24.2,Centimeter,female,male
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Id,diseases.ICD10:D50.diseaseCode.id.ICD10:D50,diseases.ICD10:E06.diseaseCode.id.ICD10:E06,diseases.ICD10:J40.diseaseCode.id.ICD10:J40,diseases.ICD10:J45.diseaseCode.id.ICD10:J45,diseases.ICD10:K02.diseaseCode.id.ICD10:K02,ethnicity.id.NCIT:C16352,ethnicity.id.NCIT:C41260,ethnicity.id.NCIT:C41261,ethnicity.id.NCIT:C42331,⋯,measures.LOINC:3141-9.date.2021-09-24,measures.LOINC:3141-9.measurementValue.quantity.unit.id.NCIT:C28252,measures.LOINC:35925-4.assayCode.id.LOINC:35925-4,measures.LOINC:35925-4.date.2021-09-24,measures.LOINC:35925-4.measurementValue.quantity.unit.id.NCIT:C49671,measures.LOINC:8308-9.assayCode.id.LOINC:8308-9,measures.LOINC:8308-9.date.2021-09-24,measures.LOINC:8308-9.measurementValue.quantity.unit.id.NCIT:C49668,sex.id.NCIT:C16576,sex.id.NCIT:C20197
2,T|HG00100,0,0,0,0,0,0,0,0,0,⋯,1,1,1,1,1,1,1,1,1,0
3,R|HG00100,0,0,0,0,0,0,0,0,0,⋯,1,1,1,1,1,1,1,1,1,0
4,R|HG00102,0,0,0,0,0,0,0,0,0,⋯,1,1,1,1,1,1,1,1,1,0
5,R|HG00099,0,0,0,0,0,0,1,0,0,⋯,1,1,1,1,1,1,1,1,1,0
6,R|HG00106,0,0,0,0,0,1,0,0,0,⋯,1,1,1,1,1,1,1,1,1,0


In [52]:
# Function to generate HSLA colors with variable hue
generate_hsla_colors <- function(h_start, h_end, fixed_s = 30, fixed_l = 80, fixed_a = 1) {
  # Create a vector of hues in the range [h_start, h_end]
  hues <- seq(h_start, h_end, by = 1)
  
  # Generate HSLA color strings
  hsla_colors <- paste0("hsla(", hues, ", ", fixed_s, "%, ", fixed_l, "%, ", fixed_a, ")")
  
  return(hsla_colors)
}

hsla_colors <- generate_hsla_colors(1, 360)

In [55]:
json_path_row <- bin_df[1,]

#remove everything after the first dot
top_level_row <- gsub("\\..*", "", json_path_row)
top_level_row[1] <- "top_level"

# add the top level row to the data frame
bin_df2 <- rbind(top_level_row, bin_df)

color_map <- NULL
# color_map <- json_data[[input_format]]
if (is.null(color_map)) {
  print("No color map found, generating random colors")
  top_levels <- unique(top_level_row[-1])
  print("Top levels:")
  print(top_levels)
  colors <- sample(hsla_colors, length(top_levels))
  print("Colors:")
  print(colors)
  # create the color mapping in form of a list
  color_map <- list()
  for (i in 1:length(top_levels)) {
    color_map[top_levels[i]] <- colors[i]
  }
}

print("Color map:")
print(color_map)

# add a new row containing the colors
color_row <- unlist(sapply(
  bin_df2[1,],
  function(x) color_map[[x]]
))
color_row <- c("background-color", color_row)

# add the color row to the data frame
bin_df2 <- rbind(color_row, bin_df2)

head(bin_df2)

[1] "No color map found, generating random colors"
[1] "Top levels:"
[1] "diseases"                  "ethnicity"                
[3] "geographicOrigin"          "id"                       
[5] "interventionsOrProcedures" "measures"                 
[7] "sex"                      
[1] "Colors:"
[1] "hsla(349, 30%, 80%, 1)" "hsla(235, 30%, 80%, 1)" "hsla(330, 30%, 80%, 1)"
[4] "hsla(317, 30%, 80%, 1)" "hsla(284, 30%, 80%, 1)" "hsla(23, 30%, 80%, 1)" 
[7] "hsla(199, 30%, 80%, 1)"
[1] "Color map:"
                 diseases                 ethnicity          geographicOrigin 
 "hsla(349, 30%, 80%, 1)"  "hsla(235, 30%, 80%, 1)"  "hsla(330, 30%, 80%, 1)" 
                       id interventionsOrProcedures                  measures 
 "hsla(317, 30%, 80%, 1)"  "hsla(284, 30%, 80%, 1)"   "hsla(23, 30%, 80%, 1)" 
                      sex 
 "hsla(199, 30%, 80%, 1)" 
[1] "Color map:"
                 diseases                 ethnicity          geographicOrigin 
 "hsla(349, 30%, 80%, 1)"  "hsla(23

ERROR: Error in color_map[[x]]: subscript out of bounds


In [28]:
# extra header row containing the top level value
# while its cells are merged by top level

top_level_to_count <- table(top_level_row[-1])
top_level_to_count

top_level_to_color <- setNames(
  unique(color_row[-1]),
  unique(top_level_row[-1])
)

top_level_to_color

header_row <- paste(
  sapply(names(top_level_to_count), function(name) {
    colspan_value <- top_level_to_count[[name]]
    color <- top_level_to_color[[name]]
    sprintf(
      '<th colspan="%d" style="background-color:%s;">%s</th>',
      colspan_value, color, name
    )
  }),
  collapse = ""  # Collapse into a single string
)
header_row


                 diseases                 ethnicity          geographicOrigin 
                        5                         5                         5 
                       id interventionsOrProcedures                  measures 
                       10                        10                         9 
                      sex 
                        2 

In [29]:
# JavaScript to prepend the header row
headerCallback <- JS(
  "function(thead, data, start, end, display) {",
  sprintf("$(thead).closest('thead').prepend('<tr><th></th>%s</tr>');", header_row),
  "}"
)


# JavaScript to color the columns based on the values in the 2nd row of the data
initComplete <- JS(
  "function(settings, json) {",
  "$('tbody tr').each(function() {",
    # Loop over each column to set background color based on the value in the first row (background-color)
    "for (var i = 0; i < $('tbody tr:eq(0) td').length; i++) {",
      "var color = $('tbody tr:eq(0)').find('td:eq(' + i + ')').text();",
      "console.log(color);",

      # Apply background color to the cell
      "$('tbody tr').find('td:eq(' + i + ')').css('background-color', color);",
      "$('thead th.sorting').eq(i).css('background-color', color);",  # Apply background color to the header

      # hide the row with the colors
      "$('tbody tr').eq(0).hide();",
    "}",
  "});",
  "}"
)

# remove row 2 and 3
bin_df3 <- bin_df2[-c(2, 3),]

dt <- datatable(
  bin_df3,
  rownames = FALSE,
  escape = FALSE,
  options = list(
    headerCallback = headerCallback,
    initComplete = initComplete
  )
)

saved_wiget_path <- "notebooks/saved_widgets"
widget_fn <- "binary_representation.html"

saveWidget(
  dt,
  file.path(
    saved_wiget_path,
    widget_fn
  )
)


In [30]:
# make sure that vscode is running the live server
live_server_path <- "http://localhost:5500/jupyrenv/notebooks/saved_widgets"
iframe_src <- paste0(
  live_server_path,
  "/",
  widget_fn
)

display_html(
  paste0(
    "<iframe src='",
    iframe_src,
    "' width='100%' height='500px'></iframe>"
  )
)