# Imports

In [2]:
print('Loading libraries')
suppressPackageStartupMessages({
    library(data.table)
    library(ggplot2)
    library(ggpubr)
    library(dplyr)
    library(stringr)
    library(rstatix)
    library(tidyr)
    }
)
print('Library load finished')

hpc =  TRUE
work_desktop = FALSE
work_linux = TRUE
singularity = TRUE
prj =  "HCC-CBS-173-Hillman-BFerris-NRG-HN003-Vectra"
if (!hpc){
    if (work_desktop){
        if (work_linux){
            base = paste0('/mnt/d/OneDrive - University of Pittsburgh/Internal_Project_Data_2/' ,prj)
        }else{
            base =paste0('D:\\OneDrive - University of Pittsburgh\\Internal_Project_Data_2\\', prj)
        }
    }else{
        base = paste0('/home/brian/data/odrive/Internal_Project_Data_2/',prj)
    }
}else{
    if (singularity){
        base = '/mnt'
    }else{
        base = paste0('/ix/rbao/Projects/', prj)
    }
}
print(base)
inform.data <- file.path(base,'data','inform_data_exports')
results <- file.path(base,'results')
print(results)


[1] "Loading libraries"
[1] "Library load finished"
[1] "/mnt"
[1] "/mnt/results"


## Load data

In [3]:
fn <- 'NRG-HN003-7552156cell-measurements_11col.csv'
start <- Sys.time()
df <- read.csv(file.path(results,fn))
stop <- Sys.time()
print(paste((stop-start),'seconds'))

[1] "49.3918745517731 seconds"


# Subset data and debug necessary table operations

In [5]:
idx <- df$Sample.Name %in% c('RACTIH_Scan1.qptiff','RACTKE_Scan1.qptiff')
sub <- df[idx,]
dim(sub)

In [6]:
colnames(sub)

In [60]:
sub %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) 

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.


Sample.Name,tissue.compartment,cell.type,n,percent_density
<chr>,<chr>,<chr>,<int>,<dbl>
RACTIH_Scan1.qptiff,central.tumor,cd8.t,4,0.0354
RACTIH_Scan1.qptiff,central.tumor,other,7191,63.564
RACTIH_Scan1.qptiff,central.tumor,panck,4118,36.4006
RACTIH_Scan1.qptiff,central.tumor,treg,0,0.0
RACTIH_Scan1.qptiff,distal.stroma,cd8.t,0,0.0
RACTIH_Scan1.qptiff,distal.stroma,other,3062,91.5396
RACTIH_Scan1.qptiff,distal.stroma,panck,283,8.4604
RACTIH_Scan1.qptiff,distal.stroma,treg,0,0.0
RACTIH_Scan1.qptiff,inner.tumor.inv.margin,cd8.t,3,0.0133
RACTIH_Scan1.qptiff,inner.tumor.inv.margin,other,11754,52.0342


In [64]:
sub %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other"))
    

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.


Sample.Name,all.stroma_cd8.t,all.stroma_panck,all.stroma_treg,all.tumor_cd8.t,all.tumor_panck,all.tumor_treg
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.0092,9.8999,0.0046,0.0206,44.0977,0.0
RACTKE_Scan1.qptiff,1.5515,24.6287,3.04,0.0413,80.8149,0.0724


In [80]:
# Calculate density in inner / outer tumor invasive margin
dense.1 <- df %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.compartment", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name")
 
# # Calculate values for all tumor / stroma
dense.2 <- df %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

# # Calculate values for all tumor invasive margin
dense.3 <- df %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

examp <- bind_cols(dense.1,dense.3,dense.2)
head(examp)

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name',
'tissue.all.invasive.margin'. You can override using the `.groups` argument.


Unnamed: 0_level_0,inner.tumor.inv.margin_cd8.t,inner.tumor.inv.margin_treg,outer.tumor.inv.margin_cd8.t,outer.tumor.inv.margin_treg,all.tumor.invasive.margin_cd8.t,all.tumor.invasive.margin_treg,all.stroma_cd8.t,all.stroma_treg,all.tumor_cd8.t,all.tumor_treg
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.0133,0.0,0.0109,0.0055,0.0122,0.0024,0.0092,0.0046,0.0206,0.0
RACTKE_Scan1.qptiff,0.0588,0.1059,0.5866,1.4068,0.3135,0.7337,1.5515,3.04,0.0413,0.0724
RACTLH_Scan2.qptiff,0.0036,0.0049,0.0377,0.0491,0.0212,0.0276,0.2723,0.1355,0.0031,0.0047
RACTNY_Scan1.qptiff,0.4126,0.1764,0.6771,0.4161,0.5481,0.2992,1.2966,0.3303,0.3981,0.1524
RACTRF_Scan1.qptiff,0.1787,0.7269,0.5579,1.78,0.358,1.2249,0.9067,2.0507,0.1553,0.8345
RACTRU_Scan1.qptiff,2.1653,0.2106,2.0287,1.9874,2.0783,1.3425,1.7533,1.9068,2.0264,0.1981


In [78]:
sub %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4))# %>%
    # select(-n) %>%
    # pivot_wider(names_from = c("tissue.all.invasive.margin", "cell.type"),
    #            values_from = "percent_density") %>%
    # select(-matches("other")) %>% select(-matches('panck')) %>%
    # tibble::column_to_rownames(var = "Sample.Name")


[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name',
'tissue.all.invasive.margin'. You can override using the `.groups` argument.


Sample.Name,tissue.all.invasive.margin,cell.type,n,percent_density
<chr>,<chr>,<chr>,<int>,<dbl>
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,cd8.t,5,0.0122
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,other,28220,68.9621
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,panck,12695,31.0232
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,treg,1,0.0024
RACTIH_Scan1.qptiff,other,cd8.t,4,0.0273
RACTIH_Scan1.qptiff,other,other,10253,69.9482
RACTIH_Scan1.qptiff,other,panck,4401,30.0246
RACTIH_Scan1.qptiff,other,treg,0,0.0
RACTKE_Scan1.qptiff,all.tumor.invasive.margin,cd8.t,1814,0.3135
RACTKE_Scan1.qptiff,all.tumor.invasive.margin,other,213502,36.9003


In [35]:
mtcars %>%
  group_by(am, carb) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  complete(am = unique(am), carb = unique(carb), fill = list(n = 0))

[1m[22m`summarise()` has grouped output by 'am'. You can override using the `.groups`
argument.


am,carb,n
<dbl>,<dbl>,<int>
0,1,3
0,2,6
0,3,3
0,4,7
1,1,4
1,2,4
1,4,3
1,6,1
1,8,1


In [26]:
# Load data
data(mtcars)
# head(mtcars)

mtcars %>%
  group_by(am, carb) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  complete(am = unique(am), carb = unique(carb), fill = list(n = 0)) %>%
  mutate(percent_am = n / sum(n) * 100)

[1m[22m`summarise()` has grouped output by 'am'. You can override using the `.groups`
argument.


am,carb,n,percent_am
<dbl>,<dbl>,<int>,<dbl>
0,1,3,9.375
0,2,6,18.75
0,3,3,9.375
0,4,7,21.875
0,6,0,0.0
0,8,0,0.0
1,1,4,12.5
1,2,4,12.5
1,3,0,0.0
1,4,3,9.375
