# Imports

In [2]:
print('Loading libraries')
suppressPackageStartupMessages({
    library(data.table)
    library(ggplot2)
    library(ggpubr)
    library(dplyr)
    library(stringr)
    library(rstatix)
    library(tidyr)
    }
)
print('Library load finished')

hpc =  TRUE
work_desktop = FALSE
work_linux = TRUE
singularity = TRUE
prj =  "HCC-CBS-173-Hillman-BFerris-NRG-HN003-Vectra"
if (!hpc){
    if (work_desktop){
        if (work_linux){
            base = paste0('/mnt/d/OneDrive - University of Pittsburgh/Internal_Project_Data_2/' ,prj)
        }else{
            base =paste0('D:\\OneDrive - University of Pittsburgh\\Internal_Project_Data_2\\', prj)
        }
    }else{
        base = paste0('/home/brian/data/odrive/Internal_Project_Data_2/',prj)
    }
}else{
    if (singularity){
        base = '/mnt'
    }else{
        base = paste0('/ix/rbao/Projects/', prj)
    }
}
print(base)
inform.data <- file.path(base,'data','inform_data_exports')
results <- file.path(base,'results')
print(results)


[1] "Loading libraries"
[1] "Library load finished"
[1] "/mnt"
[1] "/mnt/results"


## Load data

In [3]:
fn <- 'NRG-HN003-7552156cell-measurements_11col.csv'
start <- Sys.time()
df <- read.csv(file.path(results,fn))
df$pdl1.cell <- 'other'
df$pdl1.cell[df$is.pdl1.cell] = 'pdl1'
stop <- Sys.time()
print(paste((stop-start),'seconds'))

[1] "49.3918745517731 seconds"


In [83]:
head(df)

Unnamed: 0_level_0,X,Sample.Name,fn,Cell.ID,cell.type,all.tumor.stroma,tissue.compartment,pdl1.expression,pd1.expression,combined.pheno,is.pdl1.cell,is.panck.cell
Unnamed: 0_level_1,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<lgl>,<lgl>
1,1,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39799,other,Tumor,inner.tumor.inv.margin,0.125,0.316,other,False,False
2,2,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39800,panck,Tumor,central.tumor,0.36,0.107,CK+,False,True
3,3,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39801,other,Tumor,central.tumor,0.22,0.172,other,False,False
4,4,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39802,panck,Tumor,central.tumor,0.397,0.157,CK+,False,True
5,5,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39803,other,Tumor,central.tumor,0.111,0.235,PD1+,False,False
6,6,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39804,other,Tumor,central.tumor,0.42,0.154,other,False,False


In [89]:
df$pdl1.cell <- 'other'
df$pdl1.cell[df$is.pdl1.cell] = 'pdl1'
df %>% count(pdl1.cell)

pdl1.cell,n
<chr>,<int>
other,5314108
pdl1,2238048


# Subset data and debug necessary table operations

In [92]:
idx <- df$Sample.Name %in% c('RACTIH_Scan1.qptiff','RACTKE_Scan1.qptiff')
sub <- df[idx,]
dim(sub)

In [6]:
colnames(sub)

In [60]:
sub %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) 

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.


Sample.Name,tissue.compartment,cell.type,n,percent_density
<chr>,<chr>,<chr>,<int>,<dbl>
RACTIH_Scan1.qptiff,central.tumor,cd8.t,4,0.0354
RACTIH_Scan1.qptiff,central.tumor,other,7191,63.564
RACTIH_Scan1.qptiff,central.tumor,panck,4118,36.4006
RACTIH_Scan1.qptiff,central.tumor,treg,0,0.0
RACTIH_Scan1.qptiff,distal.stroma,cd8.t,0,0.0
RACTIH_Scan1.qptiff,distal.stroma,other,3062,91.5396
RACTIH_Scan1.qptiff,distal.stroma,panck,283,8.4604
RACTIH_Scan1.qptiff,distal.stroma,treg,0,0.0
RACTIH_Scan1.qptiff,inner.tumor.inv.margin,cd8.t,3,0.0133
RACTIH_Scan1.qptiff,inner.tumor.inv.margin,other,11754,52.0342


In [64]:
sub %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other"))
    

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.


Sample.Name,all.stroma_cd8.t,all.stroma_panck,all.stroma_treg,all.tumor_cd8.t,all.tumor_panck,all.tumor_treg
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.0092,9.8999,0.0046,0.0206,44.0977,0.0
RACTKE_Scan1.qptiff,1.5515,24.6287,3.04,0.0413,80.8149,0.0724


In [99]:
# # Calculate values for all tumor / stroma
t.dense.all.ts <- df %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

# # Calculate values for all tumor / stroma
pdl1.dense.all.ts <- df %>%  group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

# Calculate density in inner / outer tumor invasive margin
t.dense.in.out <- df %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.compartment", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name")

pdl1.dense.in.out <- df %>% group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.compartment", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name")



# # Calculate values for all tumor invasive margin
t.dense.all.tiv <- df %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

pdl1.dense.all.tiv <- df %>%  group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4))  %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")


examp <- bind_cols(t.dense.all.ts, pdl1.dense.all.ts,
                   t.dense.in.out, pdl1.dense.in.out,
                   t.dense.all.tiv, pdl1.dense.all.tiv) %>%
        rename_with(~paste0(.,'.density'),everything())
head(examp)

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name',
'tissue.all.invasive.margin

Unnamed: 0_level_0,all.stroma_cd8.t.density,all.stroma_treg.density,all.tumor_cd8.t.density,all.tumor_treg.density,all.stroma_pdl1.density,all.tumor_pdl1.density,inner.tumor.inv.margin_cd8.t.density,inner.tumor.inv.margin_treg.density,outer.tumor.inv.margin_cd8.t.density,outer.tumor.inv.margin_treg.density,inner.tumor.inv.margin_pdl1.density,outer.tumor.inv.margin_pdl1.density,all.tumor.invasive.margin_cd8.t.density,all.tumor.invasive.margin_treg.density,all.tumor.invasive.margin_pdl1.density
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.0092,0.0046,0.0206,0.0,2.4819,1.581,0.0133,0.0,0.0109,0.0055,1.7752,2.6947,0.0122,0.0024,2.1871
RACTKE_Scan1.qptiff,1.5515,3.04,0.0413,0.0724,36.9659,68.6116,0.0588,0.1059,0.5866,1.4068,65.9033,49.1546,0.3135,0.7337,57.8206
RACTLH_Scan2.qptiff,0.2723,0.1355,0.0031,0.0047,3.9697,8.7931,0.0036,0.0049,0.0377,0.0491,8.4694,4.4657,0.0212,0.0276,6.4075
RACTNY_Scan1.qptiff,1.2966,0.3303,0.3981,0.1524,0.9533,3.7857,0.4126,0.1764,0.6771,0.4161,3.7269,0.9665,0.5481,0.2992,2.3128
RACTRF_Scan1.qptiff,0.9067,2.0507,0.1553,0.8345,1.5253,14.2386,0.1787,0.7269,0.5579,1.78,9.6759,2.0191,0.358,1.2249,6.0553
RACTRU_Scan1.qptiff,1.7533,1.9068,2.0264,0.1981,8.9148,23.9201,2.1653,0.2106,2.0287,1.9874,22.9721,14.1146,2.0783,1.3425,17.3296


# PDL1 density

In [82]:
colnames(sub)

In [98]:
sub %>%  group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4))  %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name',
'tissue.all.invasive.margin'. You can override using the `.groups` argument.


Unnamed: 0_level_0,all.tumor.invasive.margin_pdl1
Unnamed: 0_level_1,<dbl>
RACTIH_Scan1.qptiff,2.1871
RACTKE_Scan1.qptiff,57.8206


In [94]:
sub %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4))# %>%
    # select(-n) %>%
    # pivot_wider(names_from = c("tissue.all.invasive.margin", "cell.type"),
    #            values_from = "percent_density") %>%
    # select(-matches("other")) %>% select(-matches('panck')) %>%
    # tibble::column_to_rownames(var = "Sample.Name")


[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name',
'tissue.all.invasive.margin'. You can override using the `.groups` argument.


Sample.Name,tissue.all.invasive.margin,cell.type,n,percent_density
<chr>,<chr>,<chr>,<int>,<dbl>
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,cd8.t,5,0.0122
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,other,28220,68.9621
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,panck,12695,31.0232
RACTIH_Scan1.qptiff,all.tumor.invasive.margin,treg,1,0.0024
RACTIH_Scan1.qptiff,other,cd8.t,4,0.0273
RACTIH_Scan1.qptiff,other,other,10253,69.9482
RACTIH_Scan1.qptiff,other,panck,4401,30.0246
RACTIH_Scan1.qptiff,other,treg,0,0.0
RACTKE_Scan1.qptiff,all.tumor.invasive.margin,cd8.t,1814,0.3135
RACTKE_Scan1.qptiff,all.tumor.invasive.margin,other,213502,36.9003


In [35]:
mtcars %>%
  group_by(am, carb) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  complete(am = unique(am), carb = unique(carb), fill = list(n = 0))

[1m[22m`summarise()` has grouped output by 'am'. You can override using the `.groups`
argument.


am,carb,n
<dbl>,<dbl>,<int>
0,1,3
0,2,6
0,3,3
0,4,7
1,1,4
1,2,4
1,4,3
1,6,1
1,8,1


In [26]:
# Load data
data(mtcars)
# head(mtcars)

mtcars %>%
  group_by(am, carb) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  complete(am = unique(am), carb = unique(carb), fill = list(n = 0)) %>%
  mutate(percent_am = n / sum(n) * 100)

[1m[22m`summarise()` has grouped output by 'am'. You can override using the `.groups`
argument.


am,carb,n,percent_am
<dbl>,<dbl>,<int>,<dbl>
0,1,3,9.375
0,2,6,18.75
0,3,3,9.375
0,4,7,21.875
0,6,0,0.0
0,8,0,0.0
1,1,4,12.5
1,2,4,12.5
1,3,0,0.0
1,4,3,9.375
