# Imports

In [2]:
print('Loading libraries')
suppressPackageStartupMessages({
    library(data.table)
    library(ggplot2)
    library(ggpubr)
    library(dplyr)
    library(stringr)
    library(rstatix)
    library(tidyr)
    }
)
print('Library load finished')

hpc =  TRUE
work_desktop = FALSE
work_linux = TRUE
singularity = TRUE
prj =  "HCC-CBS-173-Hillman-BFerris-NRG-HN003-Vectra"
if (!hpc){
    if (work_desktop){
        if (work_linux){
            base = paste0('/mnt/d/OneDrive - University of Pittsburgh/Internal_Project_Data_2/' ,prj)
        }else{
            base =paste0('D:\\OneDrive - University of Pittsburgh\\Internal_Project_Data_2\\', prj)
        }
    }else{
        base = paste0('/home/brian/data/odrive/Internal_Project_Data_2/',prj)
    }
}else{
    if (singularity){
        base = '/mnt'
    }else{
        base = paste0('/ix/rbao/Projects/', prj)
    }
}
print(base)
inform.data <- file.path(base,'data','inform_data_exports')
results <- file.path(base,'results')
print(results)


[1] "Loading libraries"
[1] "Library load finished"
[1] "/mnt"
[1] "/mnt/results"


## Load data

In [134]:
fn <- 'NRG-HN003-7552156cell-measurements_11col.csv'
start <- Sys.time()
df <- read.csv(file.path(results,fn))
df$pdl1.cell <- 'other'
df$pdl1.cell[df$is.pdl1.cell] = 'all.pdl1'

df$panck.pdl1.cell <-'other'
df$panck.pdl1.cell[df$is.panck.cell & df$is.pdl1.cell] = 'panck.pdl1'
idx <- !((df$combined.pheno %like% 'CK+') | (df$combined.pheno %like% 'CD3+'))
df$panck.pdl1.cell[idx & df$is.pdl1.cell] <- 'panckneg.cd3neg.pdl1'

stop <- Sys.time()
print(paste((stop-start),'seconds'))
head(df)

[1] "48.0321016311646 seconds"


Unnamed: 0_level_0,X,Sample.Name,fn,Cell.ID,cell.type,all.tumor.stroma,tissue.compartment,pdl1.expression,pd1.expression,combined.pheno,is.pdl1.cell,is.panck.cell,pdl1.cell,panck.pdl1.cell
Unnamed: 0_level_1,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<lgl>,<lgl>,<chr>,<chr>
1,1,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39799,other,Tumor,inner.tumor.inv.margin,0.125,0.316,other,False,False,other,other
2,2,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39800,panck,Tumor,central.tumor,0.36,0.107,CK+,False,True,other,other
3,3,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39801,other,Tumor,central.tumor,0.22,0.172,other,False,False,other,other
4,4,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39802,panck,Tumor,central.tumor,0.397,0.157,CK+,False,True,other,other
5,5,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39803,other,Tumor,central.tumor,0.111,0.235,PD1+,False,False,other,other
6,6,RACTIH_Scan1.qptiff,"RACTIH_Scan1_[15509,42225]_cell_seg_data.txt",39804,other,Tumor,central.tumor,0.42,0.154,other,False,False,other,other


In [195]:
df$pdl1.cell <- 'other'
df$pdl1.cell[df$is.pdl1.cell] = 'all.pdl1'

df$panck.pdl1.cell <-'other'
df$panck.pdl1.cell[df$is.panck.cell & df$is.pdl1.cell] = 'panck.pdl1'
idx <- !((df$combined.pheno %like% 'CK+') | (df$combined.pheno %like% 'CD3+'))
df$panck.pdl1.cell[idx & df$is.pdl1.cell] <- 'panckneg.cd3neg.pdl1'

df %>% count(panck.pdl1.cell)

panck.pdl1.cell,n
<chr>,<int>
other,5478651
panck.pdl1,1123403
panckneg.cd3neg.pdl1,950102


# Subset data and debug necessary table operations

In [136]:
idx <- df$Sample.Name %in% c('RACTIH_Scan1.qptiff','RACTKE_Scan1.qptiff')
sub <- df[idx,]
dim(sub)

In [6]:
colnames(sub)

In [60]:
sub %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) 

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.


Sample.Name,tissue.compartment,cell.type,n,percent_density
<chr>,<chr>,<chr>,<int>,<dbl>
RACTIH_Scan1.qptiff,central.tumor,cd8.t,4,0.0354
RACTIH_Scan1.qptiff,central.tumor,other,7191,63.564
RACTIH_Scan1.qptiff,central.tumor,panck,4118,36.4006
RACTIH_Scan1.qptiff,central.tumor,treg,0,0.0
RACTIH_Scan1.qptiff,distal.stroma,cd8.t,0,0.0
RACTIH_Scan1.qptiff,distal.stroma,other,3062,91.5396
RACTIH_Scan1.qptiff,distal.stroma,panck,283,8.4604
RACTIH_Scan1.qptiff,distal.stroma,treg,0,0.0
RACTIH_Scan1.qptiff,inner.tumor.inv.margin,cd8.t,3,0.0133
RACTIH_Scan1.qptiff,inner.tumor.inv.margin,other,11754,52.0342


# Example for pdl1 expression

In [125]:
sub %>% 
    group_by(Sample.Name, tissue.compartment) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             fill = list(median_pdl1_expression = 0)) %>%    
    pivot_wider(names_from = c("tissue.compartment"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    rename_with(~paste0(.,'_pdl1.expression'),everything())
    # group_by(Sample.Name, tissue.compartment) %>%
    # mutate(percent_density = round(n / sum(n) * 100, digits=4)) 

[1m[22m`summarise()` has grouped output by 'Sample.Name'. You can override using the
`.groups` argument.


Unnamed: 0_level_0,inner.tumor.inv.margin_pdl1.expression,outer.tumor.inv.margin_pdl1.expression
Unnamed: 0_level_1,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.557,0.6635
RACTKE_Scan1.qptiff,4.744,4.088


In [64]:
sub %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other"))
    

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.


Sample.Name,all.stroma_cd8.t,all.stroma_panck,all.stroma_treg,all.tumor_cd8.t,all.tumor_panck,all.tumor_treg
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.0092,9.8999,0.0046,0.0206,44.0977,0.0
RACTKE_Scan1.qptiff,1.5515,24.6287,3.04,0.0413,80.8149,0.0724


In [220]:
###################### # Calculate values for all tumor / stroma
#TREG CD8T
t.dense.all.ts <- df %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

#PANCK+/- PDL1
panck.pdl1.dense.all.ts <- df %>%  group_by(Sample.Name, tissue.compartment, panck.pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             panck.pdl1.cell = unique(panck.pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, panck.pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "panck.pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% 
    tibble::column_to_rownames(var = "Sample.Name")

# ALL PDL1
all.pdl1.dense.all.ts <- df %>%  group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")


###################### Calculate density in inner / outer tumor invasive margin
#TREG CD8T
t.dense.in.out <- df %>% 
    group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.compartment", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name")

#ALL PDL1+
all.pdl1.dense.in.out <- df %>% group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.compartment", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name")

#PANCK+/- PDL1+
panck.pdl1.dense.in.out <- df %>%  group_by(Sample.Name, tissue.compartment, panck.pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             panck.pdl1.cell = unique(panck.pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name, tissue.compartment) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.compartment", "panck.pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>%
    tibble::column_to_rownames(var = "Sample.Name")

#################### # Calculate density for all tumor invasive margin
#TREG CD8T
t.dense.all.tiv <- df %>%  group_by(Sample.Name, tissue.compartment, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, cell.type) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

# ALL PDL1
all.pdl1.dense.all.tiv <- df %>%  group_by(Sample.Name, tissue.compartment, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4))  %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% select(-matches('panck')) %>%
    tibble::column_to_rownames(var = "Sample.Name")

#PANCK+/- PDL1+
panck.pdl1.dense.all.tiv <- df %>%  group_by(Sample.Name, tissue.compartment, panck.pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             panck.pdl1.cell = unique(panck.pdl1.cell), 
             fill = list(n = 0)) %>%    
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, panck.pdl1.cell) %>%
    summarize(n=sum(n)) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4))  %>%
    select(-n) %>%
    pivot_wider(names_from = c("tissue.all.invasive.margin", "panck.pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% 
    tibble::column_to_rownames(var = "Sample.Name")

#################### # Calculate density for all tissue, all PDL1
#Treg CD8T
t.dense.all.tissue <- df %>%  group_by(Sample.Name, cell.type) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             cell.type = unique(cell.type), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("cell.type"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name") %>%
    rename_with(~paste0('all.tissue_',.),everything())


#ALL PDL1+
all.pdl1.dense.all.tissue <- df %>% group_by(Sample.Name, pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             pdl1.cell = unique(pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>% 
    tibble::column_to_rownames(var = "Sample.Name") %>%
    rename_with(~paste0('all.tissue_',.),everything())

#PANCK+/- PDL1+
panck.pdl1.dense.all.tissue <- df %>%  group_by(Sample.Name, panck.pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             panck.pdl1.cell = unique(panck.pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("panck.pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  
    tibble::column_to_rownames(var = "Sample.Name") %>%
    rename_with(~paste0('all.tissue_',.),everything())

all.dense <- bind_cols(t.dense.all.tissue, all.pdl1.dense.all.tissue, panck.pdl1.dense.all.tissue, 
                   t.dense.all.ts, all.pdl1.dense.all.ts, panck.pdl1.dense.all.ts, 
                   t.dense.in.out, all.pdl1.dense.in.out, panck.pdl1.dense.in.out,
                   t.dense.all.tiv, all.pdl1.dense.all.tiv, panck.pdl1.dense.all.tiv) %>%
        rename_with(~paste0(.,'.cell_percent.density'),everything())
# head(all.dense)


#################################################################
#####################################################################
###################### PDL1 expression in tumor compartments

all.pdl1.exp.in.out <- df   %>%
    group_by(Sample.Name, tissue.compartment) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             fill = list(median_pdl1_expression = 0))  %>%    
    pivot_wider(names_from = c("tissue.compartment"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    rename_with(~paste0(.,'_all'),everything())

panck.pdl1.exp.in.out <- df   %>%
    group_by(Sample.Name, tissue.compartment,panck.pdl1.cell) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.compartment = unique(tissue.compartment), 
             panck.pdl1.cell = unique(panck.pdl1.cell),
             fill = list(median_pdl1_expression = 0))  %>%    
    pivot_wider(names_from = c("tissue.compartment","panck.pdl1.cell"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>%
    tibble::column_to_rownames(var = "Sample.Name") #%>% 
    # rename_with(~paste0(.,'_pdl1.expression'),everything())

all.pdl1.exp.all.tiv <- df   %>% 
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.all.invasive.margin = unique(tissue.all.invasive.margin), 
             fill = list(median_pdl1_expression = 0))  %>%   
    pivot_wider(names_from = c("tissue.all.invasive.margin"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    rename_with(~paste0(.,'_all'),everything())

panck.pdl1.exp.all.tiv <- df   %>% 
    mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin, panck.pdl1.cell) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.all.invasive.margin = unique(tissue.all.invasive.margin),
             panck.pdl1.cell = unique(panck.pdl1.cell),
             fill = list(median_pdl1_expression = 0)) %>%   
    pivot_wider(names_from = c("tissue.all.invasive.margin","panck.pdl1.cell"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% 
    tibble::column_to_rownames(var = "Sample.Name") #%>% 
    # rename_with(~paste0(.,'_pdl1.expression'),everything())

all.pdl1.exp.all.ts <- df   %>% 
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma ) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.all.tumor.stroma  = unique(tissue.all.tumor.stroma ), 
             fill = list(median_pdl1_expression = 0))  %>%   
    pivot_wider(names_from = c("tissue.all.tumor.stroma"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% select(-matches("panck")) %>%
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    rename_with(~paste0(.,'_all'),everything())

panck.pdl1.exp.all.ts <- df   %>% 
    mutate(tissue.all.tumor.stroma = case_when(tissue.compartment %in% c("central.tumor",'inner.tumor.inv.margin') ~ "all.tumor",
                                tissue.compartment %in% c("distal.stroma",'outer.tumor.inv.margin') ~ "all.stroma",
                                TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.tumor.stroma, panck.pdl1.cell ) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.all.tumor.stroma  = unique(tissue.all.tumor.stroma ),
             panck.pdl1.cell = unique(panck.pdl1.cell),
             fill = list(median_pdl1_expression = 0))  %>%   
    pivot_wider(names_from = c("tissue.all.tumor.stroma", "panck.pdl1.cell"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>%
    tibble::column_to_rownames(var = "Sample.Name")# %>% 
    # rename_with(~paste0(.,'_pdl1.expression'),everything())

all.pdl1.exp.all.tissue <- df  %>%  
    group_by(Sample.Name) %>% 
    summarize(all.tissue=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             fill = list(all.tissue = 0))  %>%   
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    rename_with(~paste0(.,'_all'),everything())

panck.pdl1.exp.all.tissue <- df  %>%  
    group_by(Sample.Name, panck.pdl1.cell) %>% 
    summarize(all.tissue=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             panck.pdl1.cell = unique(panck.pdl1.cell),
             fill = list(all.tissue = 0))  %>%   
    pivot_wider(names_from = c("panck.pdl1.cell"),
               values_from = "all.tissue") %>%
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    select(-matches("other")) %>%
    rename_with(~paste0('all.tissue_',.),everything())

pdl1.expression <- bind_cols(all.pdl1.exp.all.tissue, panck.pdl1.exp.all.tissue,
                             all.pdl1.exp.all.ts, panck.pdl1.exp.all.ts,
                             all.pdl1.exp.all.tiv, panck.pdl1.exp.all.tiv,
                             all.pdl1.exp.in.out, panck.pdl1.exp.in.out                             
                             ) %>% 
                    rename_with(~paste0(.,'.cell_pdl1.expression'),everything())

all.measures <- bind_cols(all.dense, pdl1.expression)
t<-c()
for (nm in row.names(all.measures)){
    t <- c(t,str_split_fixed(nm,'_',n=2)[1])
}
row.names(all.measures) <- t
all.measures <- all.measures[,sort(colnames(all.measures))]
head(all.measures)

fn <- file.path(results,sprintf('NRG-HN003.sample.values.%d.%d.csv',
                                dim(all.measures)[1],
                                dim(all.measures)[2]))
print(fn)
write.csv(df,fn)
print('Finished')
                   

[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.all.tumor.stroma'.
You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. You
can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Sample.Name', 'tissue.compartment'. 

Unnamed: 0_level_0,all.stroma_all.cell_pdl1.expression,all.stroma_all.pdl1.cell_percent.density,all.stroma_cd8.t.cell_percent.density,all.stroma_panck.pdl1.cell_pdl1.expression,all.stroma_panck.pdl1.cell_percent.density,all.stroma_panckneg.cd3neg.pdl1.cell_pdl1.expression,all.stroma_panckneg.cd3neg.pdl1.cell_percent.density,all.stroma_treg.cell_percent.density,all.tissue_all.cell_pdl1.expression,all.tissue_all.pdl1.cell_percent.density,⋯,inner.tumor.inv.margin_panckneg.cd3neg.pdl1.cell_percent.density,inner.tumor.inv.margin_treg.cell_percent.density,outer.tumor.inv.margin_all.cell_pdl1.expression,outer.tumor.inv.margin_all.pdl1.cell_percent.density,outer.tumor.inv.margin_cd8.t.cell_percent.density,outer.tumor.inv.margin_panck.pdl1.cell_pdl1.expression,outer.tumor.inv.margin_panck.pdl1.cell_percent.density,outer.tumor.inv.margin_panckneg.cd3neg.pdl1.cell_pdl1.expression,outer.tumor.inv.margin_panckneg.cd3neg.pdl1.cell_percent.density,outer.tumor.inv.margin_treg.cell_percent.density
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH,0.592,2.4819,0.0092,3.046,0.3137,4.328,2.1497,0.0046,0.535,1.9324,⋯,0.6375,0.0,0.6635,2.6947,0.0109,3.166,0.3655,4.208,2.3074,0.0055
RACTKE,2.937,36.9659,1.5515,7.473,21.3651,7.575,13.4975,3.04,4.143,52.2413,⋯,9.1335,0.1059,4.088,49.1546,0.5866,7.295,33.145,7.429,14.5832,1.4068
RACTLH,0.513,3.9697,0.2723,5.859,1.5995,5.718,2.2862,0.1355,0.629,6.1618,⋯,1.6733,0.0049,0.486,4.4657,0.0377,5.932,2.4926,5.606,1.9537,0.0491
RACTNY,0.397,0.9533,1.2966,1.5375,0.1327,4.19,0.75,0.3303,0.519,2.101,⋯,0.8985,0.1764,0.452,0.9665,0.6771,1.479,0.2093,2.7,0.7339,0.4161
RACTRF,0.5,1.5253,0.9067,4.727,0.3559,5.8645,1.0508,2.0507,0.954,8.7347,⋯,3.2173,0.7269,0.528,2.0191,0.5579,4.63,0.5446,5.693,1.3284,1.78
RACTRU,1.11,8.9148,1.7533,5.4175,0.7182,6.2145,7.1438,1.9068,1.295,12.292,⋯,11.4745,0.2106,1.476,14.1146,2.0287,5.46,1.5131,6.0545,11.0922,1.9874


[1] "/mnt/results/NRG-HN003.sample.values.34.48.csv"
[1] "Finished"


In [219]:
all.measures <- all.measures[,sort(colnames(all.measures))]
head(all.measures)

Unnamed: 0_level_0,all.stroma_all.cell_pdl1.expression,all.stroma_all.pdl1.cell_percent.density,all.stroma_cd8.t.cell_percent.density,all.stroma_panck.pdl1.cell_pdl1.expression,all.stroma_panck.pdl1.cell_percent.density,all.stroma_panckneg.cd3neg.pdl1.cell_pdl1.expression,all.stroma_panckneg.cd3neg.pdl1.cell_percent.density,all.stroma_treg.cell_percent.density,all.tissue_all.cell_pdl1.expression,all.tissue_all.pdl1.cell_percent.density,⋯,inner.tumor.inv.margin_panckneg.cd3neg.pdl1.cell_percent.density,inner.tumor.inv.margin_treg.cell_percent.density,outer.tumor.inv.margin_all.cell_pdl1.expression,outer.tumor.inv.margin_all.pdl1.cell_percent.density,outer.tumor.inv.margin_cd8.t.cell_percent.density,outer.tumor.inv.margin_panck.pdl1.cell_pdl1.expression,outer.tumor.inv.margin_panck.pdl1.cell_percent.density,outer.tumor.inv.margin_panckneg.cd3neg.pdl1.cell_pdl1.expression,outer.tumor.inv.margin_panckneg.cd3neg.pdl1.cell_percent.density,outer.tumor.inv.margin_treg.cell_percent.density
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RACTIH,0.592,2.4819,0.0092,3.046,0.3137,4.328,2.1497,0.0046,0.535,1.9324,⋯,0.6375,0.0,0.6635,2.6947,0.0109,3.166,0.3655,4.208,2.3074,0.0055
RACTKE,2.937,36.9659,1.5515,7.473,21.3651,7.575,13.4975,3.04,4.143,52.2413,⋯,9.1335,0.1059,4.088,49.1546,0.5866,7.295,33.145,7.429,14.5832,1.4068
RACTLH,0.513,3.9697,0.2723,5.859,1.5995,5.718,2.2862,0.1355,0.629,6.1618,⋯,1.6733,0.0049,0.486,4.4657,0.0377,5.932,2.4926,5.606,1.9537,0.0491
RACTNY,0.397,0.9533,1.2966,1.5375,0.1327,4.19,0.75,0.3303,0.519,2.101,⋯,0.8985,0.1764,0.452,0.9665,0.6771,1.479,0.2093,2.7,0.7339,0.4161
RACTRF,0.5,1.5253,0.9067,4.727,0.3559,5.8645,1.0508,2.0507,0.954,8.7347,⋯,3.2173,0.7269,0.528,2.0191,0.5579,4.63,0.5446,5.693,1.3284,1.78
RACTRU,1.11,8.9148,1.7533,5.4175,0.7182,6.2145,7.1438,1.9068,1.295,12.292,⋯,11.4745,0.2106,1.476,14.1146,2.0287,5.46,1.5131,6.0545,11.0922,1.9874


# Check column names have all comparisons in density and pdl1 expression, 6 tissue x 3 = 18 cell PDL1 expression, 6 tissue * 5 = 30 cell density

In [214]:
temp <- all.measures %>% select(matches('pdl1.expression'))
dim(temp)
print(colnames(temp))

 [1] "all.tissue_all.cell_pdl1.expression"                                
 [2] "all.tissue_panck.pdl1.cell_pdl1.expression"                         
 [3] "all.tissue_panckneg.cd3neg.pdl1.cell_pdl1.expression"               
 [4] "all.stroma_all.cell_pdl1.expression"                                
 [5] "all.tumor_all.cell_pdl1.expression"                                 
 [6] "all.stroma_panck.pdl1.cell_pdl1.expression"                         
 [7] "all.stroma_panckneg.cd3neg.pdl1.cell_pdl1.expression"               
 [8] "all.tumor_panck.pdl1.cell_pdl1.expression"                          
 [9] "all.tumor_panckneg.cd3neg.pdl1.cell_pdl1.expression"                
[10] "all.tumor.invasive.margin_all.cell_pdl1.expression"                 
[11] "all.tumor.invasive.margin_panck.pdl1.cell_pdl1.expression"          
[12] "all.tumor.invasive.margin_panckneg.cd3neg.pdl1.cell_pdl1.expression"
[13] "inner.tumor.inv.margin_all.cell_pdl1.expression"                    
[14] "outer.tumor.inv.mar

In [215]:
temp <- all.measures %>% select(matches('.density'))
dim(temp)
print(sort(colnames(temp)))

 [1] "all.stroma_all.pdl1.cell_percent.density"                           
 [2] "all.stroma_cd8.t.cell_percent.density"                              
 [3] "all.stroma_panck.pdl1.cell_percent.density"                         
 [4] "all.stroma_panckneg.cd3neg.pdl1.cell_percent.density"               
 [5] "all.stroma_treg.cell_percent.density"                               
 [6] "all.tissue_all.pdl1.cell_percent.density"                           
 [7] "all.tissue_cd8.t.cell_percent.density"                              
 [8] "all.tissue_panck.pdl1.cell_percent.density"                         
 [9] "all.tissue_panckneg.cd3neg.pdl1.cell_percent.density"               
[10] "all.tissue_treg.cell_percent.density"                               
[11] "all.tumor.invasive.margin_all.pdl1.cell_percent.density"            
[12] "all.tumor.invasive.margin_cd8.t.cell_percent.density"               
[13] "all.tumor.invasive.margin_panck.pdl1.cell_percent.density"          
[14] "all.tumor.invasive.

In [217]:
temp <- all.measures %>% select(matches('.density')) %>%
        select(matches('pdl1.cell'))
dim(temp)
print(sort(colnames(temp)))

 [1] "all.stroma_all.pdl1.cell_percent.density"                           
 [2] "all.stroma_panck.pdl1.cell_percent.density"                         
 [3] "all.stroma_panckneg.cd3neg.pdl1.cell_percent.density"               
 [4] "all.tissue_all.pdl1.cell_percent.density"                           
 [5] "all.tissue_panck.pdl1.cell_percent.density"                         
 [6] "all.tissue_panckneg.cd3neg.pdl1.cell_percent.density"               
 [7] "all.tumor.invasive.margin_all.pdl1.cell_percent.density"            
 [8] "all.tumor.invasive.margin_panck.pdl1.cell_percent.density"          
 [9] "all.tumor.invasive.margin_panckneg.cd3neg.pdl1.cell_percent.density"
[10] "all.tumor_all.pdl1.cell_percent.density"                            
[11] "all.tumor_panck.pdl1.cell_percent.density"                          
[12] "all.tumor_panckneg.cd3neg.pdl1.cell_percent.density"                
[13] "inner.tumor.inv.margin_all.pdl1.cell_percent.density"               
[14] "inner.tumor.inv.mar

# Take median of each column

In [224]:
mv <- apply(all.measures, 2, median)
mv

# PDL1 density

In [142]:
colnames(sub)

In [208]:
sub %>%  group_by(Sample.Name, panck.pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             panck.pdl1.cell = unique(panck.pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("panck.pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>%
    tibble::column_to_rownames(var = "Sample.Name")

[1m[22m`summarise()` has grouped output by 'Sample.Name'. You can override using the
`.groups` argument.


Unnamed: 0_level_0,panck.pdl1.cell,panckneg.cd3neg.pdl1.cell
Unnamed: 0_level_1,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.7107,1.2127
RACTKE_Scan1.qptiff,38.1146,12.9813


In [206]:
sub %>%  group_by(Sample.Name, panck.pdl1.cell) %>% 
    summarize(n=n()) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             panck.pdl1.cell = unique(panck.pdl1.cell), 
             fill = list(n = 0)) %>%    
    group_by(Sample.Name) %>%
    mutate(percent_density = round(n / sum(n) * 100, digits=4)) %>%
    select(-n) %>%
    pivot_wider(names_from = c("panck.pdl1.cell"),
               values_from = "percent_density") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>%
    tibble::column_to_rownames(var = "Sample.Name")

[1m[22m`summarise()` has grouped output by 'Sample.Name'. You can override using the
`.groups` argument.


Unnamed: 0_level_0,panck.pdl1.cell,panckneg.cd3neg.pdl1.cell
Unnamed: 0_level_1,<dbl>,<dbl>
RACTIH_Scan1.qptiff,0.7107,1.2127
RACTKE_Scan1.qptiff,38.1146,12.9813


In [172]:
sub %>%  group_by(Sample.Name, panck.pdl1.cell) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             panck.pdl1.cell = unique(panck.pdl1.cell),
             fill = list(median_pdl1_expression = 0))  %>%   
    pivot_wider(names_from = c("panck.pdl1.cell"),
               values_from = "median_pdl1_expression") %>%
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    select(-matches("other")) %>%
    rename_with(~paste0(.,'_pdl1.expression'),everything())

[1m[22m`summarise()` has grouped output by 'Sample.Name'. You can override using the
`.groups` argument.


Unnamed: 0_level_0,panck.pdl1.cell_pdl1.expression,panckneg.cd3neg.pdl1.cell_pdl1.expression
Unnamed: 0_level_1,<dbl>,<dbl>
RACTIH_Scan1.qptiff,3.278,4.041
RACTKE_Scan1.qptiff,6.648,6.917


In [148]:
sub  %>%       mutate(tissue.all.invasive.margin = case_when(tissue.compartment %in% c('outer.tumor.inv.margin',
                                                                         'inner.tumor.inv.margin') ~ "all.tumor.invasive.margin",
                                                  TRUE ~ "other")) %>%
    group_by(Sample.Name, tissue.all.invasive.margin,panck.pdl1.cell) %>% 
    summarize(median_pdl1_expression=median(pdl1.expression)) %>%
    ungroup() %>%
    complete(Sample.Name = unique(Sample.Name), 
             tissue.all.invasive.margin = unique(tissue.all.invasive.margin),
             panck.pdl1.cell = unique(panck.pdl1.cell),
             fill = list(median_pdl1_expression = 0)) %>%   
    pivot_wider(names_from = c("tissue.all.invasive.margin","panck.pdl1.cell"),
               values_from = "median_pdl1_expression") %>%
    select(-matches("other")) %>%  select(-matches("distal")) %>%  
    select(-matches("central")) %>% 
    tibble::column_to_rownames(var = "Sample.Name") %>% 
    rename_with(~paste0(.,'_pdl1.expression'),everything())

[1m[22m`summarise()` has grouped output by 'Sample.Name',
'tissue.all.invasive.margin'. You can override using the `.groups` argument.


Unnamed: 0_level_0,all.tumor.invasive.margin_panck.pdl1.cell_pdl1.expression,all.tumor.invasive.margin_panckneg.cd3neg.pdl1.cell_pdl1.expression
Unnamed: 0_level_1,<dbl>,<dbl>
RACTIH_Scan1.qptiff,3.186,3.999
RACTKE_Scan1.qptiff,6.535,6.811


In [146]:
# Create a sample data.frame
df.test <- data.frame(x = 1:5)

# Rename the rows
df.test %>% 
  mutate(y = case_when(
    x == 1 ~ "one",
    x == 2 ~ "two",
    x == 3 ~ "three",
    TRUE ~ "other"
  ))

x,y
<int>,<chr>
1,one
2,two
3,three
4,other
5,other
