# Part 11: Proportionality analysis of populations in T1D and Healthy

In this document, we analyze the frequencies of the populations and subpopulations in T1D patients and healhty donors. In the manuscript, we performed frequentist analysis, which is presented here, and Bayesian analysis, which is described in a separate GitHub [repository by @martinmodrak](https://github.com/martinmodrak/diabetes_populace). 

During the revision process, we added part focusing on the proportion of subpopulations in different age groups. 

In [None]:
# Loading of packages and scripts
source("diabetes_analysis_v07.R")

## CD4 populations

In this part of the analysis, we will quantify the frequencies of Cd4 populations and subpopulation from parent and total counts. The resulting tables will be used for frequnetist and Bayesian analysis. 

We will load the CD4 dataset and add metadata to ensure that we are not losing patients with zero counts of cells in any subpopulations.

In [None]:
cd4_l1_full_filt  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")

In [None]:
cd4_patient_meta  <- cd4_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

We will calculate the frequencies of the Level3 subpopulations from total. 

In [None]:
df3  <- cd4_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l3) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l3", values_from = "n", values_fill = 0) 
df_l3  <- left_join((cd4_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df_l3[is.na(df_l3)] <- 0
df_l3  <- df_l3  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd4_l1_full_filt@misc$all_md %>% 
  unique()

df_l3  <- left_join(df_l3, md_to_join)
df_l3$Level  <- "L3"

Check the resulting table:

In [None]:
df_l3

We will calculate the frequencies of the Level2 populations from total. 

In [None]:
df3  <- cd4_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l2) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l2", values_from = "n", values_fill = 0) 
df_l2  <- left_join((cd4_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df_l2[is.na(df_l2)] <- 0
df_l2  <- df_l2  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd4_l1_full_filt@misc$all_md %>% 
  unique()

df_l2  <- left_join(df_l2, md_to_join)
df_l2$Level  <- "L2"

Check the resulting table:

In [None]:
df_l2

Finally, will calculate the frequencies of the Level1 populations from total. 

In [None]:
df3  <- cd4_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l1) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l1", values_from = "n", values_fill = 0) 
df_l1  <- left_join((cd4_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df_l1[is.na(df_l1)] <- 0
df_l1  <- df_l1  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd4_l1_full_filt@misc$all_md %>% 
  unique()

df_l1  <- left_join(df_l1, md_to_join)
df_l1$Level  <- "L1"

Check the resulting table:

In [None]:
df_l1

Now let's bind the tables with the three levels of populations into one table.

In [None]:
df_all_levels  <- rbind(df_l1, df_l2, df_l3)

In [None]:
all_counts  <- df_all_levels

We will also add the frequency of total for each population and level. 

In [None]:
df3  <- all_counts %>% 
  group_by(Sample_ID, Level) %>% 
  mutate(freq_from_total = n / sum(n)) 

In [None]:
df3

To calculate the frequency of parent, we need to set the parent population for each subpopulation.

In [None]:
df3  <- df3  %>% separate(annotations, into = c("annot_l1","annot_l2",NA), sep = "---", remove = F)  %>% 
mutate(Parent_annotation = case_when(
Level == "L1" ~ "CD4",
Level == "L2" ~ annot_l1,
Level == "L3" ~ paste0(annot_l1, "---" ,annot_l2)
))

In [None]:
df3

Next, we will calculate the total count per parent population for each level and each patient.

In [None]:
levels_l1  <- all_counts %>% 
  filter(Level == "L1")  %>% 
pull(annotations)  %>% unique
levels_l2  <- all_counts %>% 
  filter(Level == "L2")  %>% 
pull(annotations)  %>% unique

We will iterate through level 1 annotations and level 2 annotation to obtain total counts per patient.

In [None]:
for(i in 1:length(levels_l1)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l1[i]) & Level == "L2")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L2"

    df_filt$Parent_annotation = levels_l1[i]

    if(i > 1){
       
        df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
        
    } else {
        df_sum_of_parent  <- df_filt
    }

}

for(i in 1:length(levels_l2)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l2[i]) & Level == "L3")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L3"

    df_filt$Parent_annotation = levels_l2[i]

    df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
   

}

Check the resulting table:

In [None]:
df_sum_of_parent

Now, let's join the total counts to our previous table. 

In [None]:
df4  <- left_join(df3, df_sum_of_parent)

As we now have the counts of both the population itself and the parent population per each sample, we can calculate the frequency from parent population.  

In [None]:
df4$freq_from_parent  <- df4$n/df4$total_per_patient

Check the resulting table:

In [None]:
df4

We will convert frequencies to percentage and add zeroes to subpopulations where parent population has zero count.

In [None]:
df4$pct_from_total  <- df4$freq_from_total*100
df4$pct_from_parent  <- df4$freq_from_parent*100


In [None]:
df4$freq_from_parent <- ifelse(is.na(df4$freq_from_parent) & df4$Level != "L1", 0, df4$freq_from_parent)
df4$pct_from_parent  <- ifelse(is.na(df4$pct_from_parent) & df4$Level != "L1", 0, df4$pct_from_parent)

Check the resulting table:

In [None]:
df4

We have the table with frequencies of CD4 subpopulations prepared, so we will save it and perform the same analysis for CD8 T cells. The CD4 subpopulations are stored in `freq_cd4`. 

In [None]:
freq_cd4  <- df4

## CD8 populations

The process for CD8 T cells is the same as for CD4 T cells. For comments, see above. 

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_patient_meta  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

In [None]:
df3  <- cd8_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l3) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l3", values_from = "n", values_fill = 0) 
df4  <- left_join((cd8_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df4[is.na(df4)] <- 0
df4  <- df4  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd8_l1_full_filt@misc$all_md %>% 
  unique()

df4  <- left_join(df4, md_to_join)

In [None]:
df4$Level  <- "L3"

In [None]:
df_l3  <- df4

In [None]:
df3  <- cd8_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l2) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l2", values_from = "n", values_fill = 0) 
df4  <- left_join((cd8_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df4[is.na(df4)] <- 0
df4  <- df4  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd8_l1_full_filt@misc$all_md %>% 
  unique()

df4  <- left_join(df4, md_to_join)
df4$Level  <- "L2"

In [None]:
df_l2  <- df4

In [None]:
df4

In [None]:
df3  <- cd8_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l1) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l1", values_from = "n", values_fill = 0) 
df4  <- left_join((cd8_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df4[is.na(df4)] <- 0
df4  <- df4  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd8_l1_full_filt@misc$all_md %>% 
  unique()

df4  <- left_join(df4, md_to_join)
df4$Level  <- "L1"

df_l1  <- df4

In [None]:
df_l3

In [None]:
df_all_levels  <- rbind(df_l1, df_l2, df_l3)

In [None]:
df_all_levels

In [None]:
all_counts  <- df_all_levels

In [None]:
df3  <- all_counts %>% 
  group_by(Sample_ID, Level) %>% 
  mutate(freq_from_total = n / sum(n)) 

In [None]:
df3

In [None]:
df3  <- df3  %>% separate(annotations, into = c("annot_l1","annot_l2",NA), sep = "---", remove = F)  %>% 
mutate(Parent_annotation = case_when(
Level == "L1" ~ "CD8",
Level == "L2" ~ annot_l1,
Level == "L3" ~ paste0(annot_l1, "---" ,annot_l2)
))

In [None]:
df3

In [None]:
levels_l1  <- all_counts %>% 
  filter(Level == "L1")  %>% 
pull(annotations)  %>% unique
levels_l2  <- all_counts %>% 
  filter(Level == "L2")  %>% 
pull(annotations)  %>% unique

In [None]:

for(i in 1:length(levels_l1)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l1[i]) & Level == "L2")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L2"

    df_filt$Parent_annotation = levels_l1[i]

    if(i > 1){
       
        df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
        
    } else {
        df_sum_of_parent  <- df_filt
        
        
    }

}

for(i in 1:length(levels_l2)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l2[i]) & Level == "L3")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L3"

    df_filt$Parent_annotation = levels_l2[i]

    df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
   

}

In [None]:
df_sum_of_parent

In [None]:
df4  <- left_join(df3, df_sum_of_parent)

In [None]:
df4$freq_from_parent  <- df4$n/df4$total_per_patient

In [None]:
df4$pct_from_total  <- df4$freq_from_total*100
df4$pct_from_parent  <- df4$freq_from_parent*100


In [None]:
df4$freq_from_parent <- ifelse(is.na(df4$freq_from_parent) & df4$Level != "L1", 0, df4$freq_from_parent)
df4$pct_from_parent  <- ifelse(is.na(df4$pct_from_parent) & df4$Level != "L1", 0, df4$pct_from_parent)

In [None]:
freq_cd8  <- df4

We have both tables ready, so we will save the CD8 table in `freq_cd8` and we can move on to merging tables for CD4 and Cd8 T cells. 

## Merge CD4 and CD8 T cell tables

In [None]:
freq_cd4$Main  <- "CD4"
freq_cd8$Main  <- "CD8"


In [None]:
populations_2  <- rbind(freq_cd4, freq_cd8)

In [None]:
populations_2$Experiment_ID  %>% table

In [None]:
write.csv(populations_2, "../tables/populations_2.csv")

In [None]:
populations_2

# Population statistics

## Correlation of populations in preliminary and final

### Freq from total

In [None]:
populations_2  <- read_csv("../tables/populations_2.csv")
populations_2$`...1`  <- NULL

In [None]:
populations_2$prelim_final  <- ifelse(populations_2$Experiment_ID %in% c("Exp08","Exp10","Exp11"), 
                                     "Prelim","Final")

In [None]:
IDs  <- populations_2  %>% dplyr::filter(prelim_final == "Prelim")  %>% pull(Patient_ID)  %>% unique

In [None]:
IDs

In [None]:
popul_final_freq_from_total  <- populations_2  %>% 
ungroup  %>% 
mutate(prelim_final = ifelse(Experiment_ID %in% c("Exp08", "Exp10", "Exp11"), "Prelim","Final"))  %>% 
dplyr::filter(Patient_ID %in% IDs & Time == "T0")  %>% 
dplyr::select(Patient_ID, Time, annotations, prelim_final, freq_from_total, Level)  %>% 
pivot_wider(names_from = "prelim_final", values_from = freq_from_total)

In [None]:
popul_final_freq_from_total

In [None]:
options(repr.plot.width= 20, repr.plot.height = 15)
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=Final, y=Prelim)) +
  geom_point(shape = 16, alpha = 1, size = 2, aes(color = as.factor(annotations))) +
 geom_smooth(method=lm) + theme(legend.position = "bottom") + 
ggpubr::stat_cor()

In [None]:
l2_only  <- popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")
centroids <- aggregate(cbind(Final,Prelim)~annotations,l2_only,mean)

In [None]:
options(repr.plot.width= 20, repr.plot.height = 12)
suppressWarnings({
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=Final, y=Prelim)) +
 geom_smooth(method=lm, alpha = 0.2) + theme(legend.position = "bottom") + 
 geom_point(alpha =0.3, size = 3, aes(color = as.factor(annotations), 
                                       fill = as.factor(annotations),
                                    shape = as.factor(annotations))) +
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) +
ggpubr::stat_cor() + geom_point(data=centroids,size=5, color = "black",
                                aes(fill = as.factor(annotations),
                                                          shape = as.factor(annotations))) + 
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
theme(axis.text.x = element_text(angle = 90)) + 
theme_classic() +
ggtheme()
})

In [None]:
ls()

In [None]:
dir.create("../figures/QC/")

In [None]:
options(repr.plot.width= 20, repr.plot.height = 12)
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=Final, y=Prelim)) +
 geom_smooth(method=lm, alpha = 0.2) + 
theme(legend.position = "bottom") + 
 geom_point(alpha =0.3, size = 3, aes(color = as.factor(annotations), 
                                       fill = as.factor(annotations),
                                    shape = as.factor(annotations))) +
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) +
ggpubr::stat_cor(size = 5) + geom_point(data=centroids,size=5, color = "black",
                                aes(fill = as.factor(annotations),
                                                          shape = as.factor(annotations))) + 
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
theme(axis.text.x = element_text(angle = 90)) + 
theme_classic() +
ggtheme()

ggsave("../figures/QC/final_vs_preliminary.svg", width = 44, height = 25, units = "cm")
ggsave("../figures/QC/final_vs_preliminary.png", width = 44, height = 25, units = "cm")

In [None]:
options(warn = -1) 

In [None]:
options(repr.plot.width= 15, repr.plot.height = 15)
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=Final, y=Prelim)) +
facet_wrap(~annotations, ncol = 4, scales = "free")+
#  geom_point(shape = 16, alpha = 1, size = 2, aes(color = as.factor(annotations))) +
 geom_smooth(method=lm, alpha = 0.2) + theme(legend.position = "bottom") + 
ggpubr::stat_cor(size = 5) +
theme(axis.text.x = element_text(angle = 90)) + 
geom_point(size=3,                                aes(fill = as.factor(annotations),
                                                          shape = as.factor(annotations))) + 
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) +
theme_classic() +
ggtheme() + NoLegend()

## Correlation of populations in T0 vs T1

### Freq from total

In [None]:
IDs_1  <- populations_2  %>% 
dplyr::filter(prelim_final == "Final" & Time == "T0" & Disease == "Dia")  %>% 
pull(Patient_ID)  %>% unique

In [None]:
IDs_2  <- populations_2  %>% 
dplyr::filter(prelim_final == "Final" & Time == "T1" & Disease == "Dia")  %>% 
pull(Patient_ID)  %>% unique

In [None]:
IDs  <- intersect(IDs_1, IDs_2)

In [None]:
IDs

In [None]:
popul_final_freq_from_total  <- populations_2  %>% 
ungroup  %>% 
mutate(prelim_final = ifelse(Experiment_ID %in% c("Exp08", "Exp10", "Exp11"), "Prelim","Final"))  %>% 
dplyr::filter(Patient_ID %in% IDs & prelim_final == "Final")  %>% 
dplyr::select(Patient_ID, Time, annotations, freq_from_total, Level)  %>% 
pivot_wider(names_from = "Time", values_from = freq_from_total)

In [None]:
popul_final_freq_from_total  %>% dplyr::filter(!is.na(T1))

In [None]:
options(repr.plot.width= 20, repr.plot.height = 15)
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=T0, y=T1)) +
  geom_point(shape = 16, alpha = 1, size = 2, aes(color = as.factor(annotations))) +
 geom_smooth(method=lm) + theme(legend.position = "bottom") + 
ggpubr::stat_cor()

In [None]:
l2_only  <- popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")
centroids <- aggregate(cbind(T1,T0)~annotations,l2_only,mean)

In [None]:
options(repr.plot.width= 20, repr.plot.height = 12)
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=T0, y=T1)) +
 geom_smooth(method=lm, alpha = 0.2) + theme(legend.position = "bottom") + 
 geom_point(alpha =0.3, size = 3, aes(color = as.factor(annotations), 
                                       fill = as.factor(annotations),
                                    shape = as.factor(annotations))) +
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) +
ggpubr::stat_cor(size = 5) + geom_point(data=centroids,size=5, color = "black",
                                aes(fill = as.factor(annotations),
                                                          shape = as.factor(annotations))) + 
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
theme(axis.text.x = element_text(angle = 90)) + 
theme_classic() +
ggtheme()

ggsave("../figures/QC/t1_vs_t0.svg", width = 44, height = 25, units = "cm")
ggsave("../figures/QC/t1_vs_t0.png", width = 44, height = 25, units = "cm")

In [None]:
options(repr.plot.width= 15, repr.plot.height = 15)
popul_final_freq_from_total %>% 
dplyr::filter(Level == "L2")  %>% 
ggplot(aes(x=T0, y=T1)) +
facet_wrap(~annotations, ncol = 4, scales = "free")+
#  geom_point(shape = 16, alpha = 1, size = 2, aes(color = as.factor(annotations))) +
 geom_smooth(method=lm, alpha = 0.2) + theme(legend.position = "bottom") + 
ggpubr::stat_cor(size = 5) +
theme(axis.text.x = element_text(angle = 90)) + 
geom_point(size=3,                                aes(fill = as.factor(annotations),
                                                          shape = as.factor(annotations))) + 
scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) +
theme_classic() +
ggtheme() + NoLegend()

# Correlation with metadata 

In this part, we will see which level 2 populations correlate with variables included in metadata, such as age and sex of participants. 

In [None]:
colnames(populations_2)

In [None]:
populations_2  %>% 
dplyr::filter(Level == 'L2' & Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))  %>% 
dplyr::select(annotations, Sex, Disease, Time, Age, freq_from_total, Patient_ID)  %>% 
pivot_wider(names_from = annotations, values_from = freq_from_total, values_fill = 0)


In [None]:
df_md_cor  <- populations_2  %>% 
dplyr::filter(Level == 'L2' & Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))  %>% 
dplyr::select(annotations, Sex, Disease, Time, Age, freq_from_total, Patient_ID)  %>% 
pivot_wider(names_from = annotations, values_from = freq_from_total, values_fill = 0)  %>% 
dplyr::select(-Patient_ID) 


In [None]:
library(ggcorrplot)

In [None]:
p.mat <- model.matrix(~0+., data= df_md_cor) %>% 
   cor_pmat(use="pairwise.complete.obs")

In [None]:
options(repr.plot.width = 12, repr.plot.height = 12)

model.matrix(~0+., data= df_md_cor) %>% 
  cor(use="pairwise.complete.obs") %>% 
  ggcorrplot(show.diag=FALSE, method = "circle", lab=F, 
             lab_size=2, hc.order = T, p.mat = p.mat, 
  type = "full", insig = "blank")

In [None]:
ggsave("../figures/md_correlation/md_correlation_heatmap.svg", width = 14, height = 14, create.dir = TRUE)

# Correlation with clinical data

## Freq from total

In [None]:
populations_2  %>% colnames

In [None]:
populations_2  %>% ncol

In [None]:
md3  <- populations_2  %>% 
ungroup  %>% 
dplyr::select(-Main)  %>% 
mutate(annotations = paste(Level, annotations))  %>% 
dplyr::select(6,7,10,12,2,22,16)  %>% 
dplyr::filter(prelim_final == "Final")  %>% 
unique  %>% 
pivot_wider(names_from = "annotations", values_from = "freq_from_total")

In [None]:
md3

In [None]:
md_cpept_orig  <- read_xlsx("../data/metadata_v06.xlsx") 

In [None]:
colnames(md_cpept_orig)

In [None]:
md_cpept  <- read_xlsx("../data/metadata_v06.xlsx")  %>% 
mutate(Patient_Time = paste(patient, time_taken))  %>% 
       dplyr::select(Patient_Time, fasting_cpept_T1, fasting_cpept_1, c_peptide_chang)  %>% unique

In [None]:
md_cpept   <- md_cpept  %>% mutate(group = substr(Patient_Time,1,1))  %>% filter(group == 1)

In [None]:
md_cpept

In [None]:
md3  <- md3  %>% separate(Condition, into = c("Disease", "Time"), remove = F, sep = " ")

In [None]:
md3$Patient_Time  <- paste(md3$Patient_ID, md3$Time)

In [None]:
md4  <- md_cpept  %>% left_join(md3)  %>% dplyr::filter(!is.na(fasting_cpept_T1))

In [None]:
md4 

In [None]:
population_colnames  <- which(substr(colnames(md4),start = 1,stop = 1) == "L")

In [None]:
population_colnames  %>% length

In [None]:
which(colnames(md4) == "fasting_cpept_1")

In [None]:
md4

In [None]:
calc_correlation  <- function(i){
    df  <- md4  %>% dplyr::select(i,2)  %>% filter(!is.na(2))
    colnames(df)  <- c("value","fasting_cpept_t1")
    cor  <- cor.test(df$value, df$fasting_cpept_t1)
    res_df1  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*102>1,1,cor$p.value*102),
                         test = "fasting_cpept_T1")
    
    df  <- md4  %>% dplyr::select(i,3)  %>% filter(!is.na(3))
    colnames(df)  <- c("value","fasting_cpept_1")
    cor  <- cor.test(df$value, df$fasting_cpept_1)
    res_df2  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*102>1,1,cor$p.value*102),
                         test = "fasting_cpept")
    
    df  <- md4  %>% dplyr::select(i,4)  %>% filter(!is.na(4))
    colnames(df)  <- c("value","c_peptide_change")
    cor  <- cor.test(df$value, df$c_peptide_change)
    res_df3  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*102>1,1,cor$p.value*102),
                         test = "c_peptide_change")
    
    res_df  <- rbind(res_df1, res_df2, res_df3)
    return(res_df)
}

In [None]:
populations_corr  <- future_map(population_colnames, calc_correlation)

In [None]:
populations_corr  <- bind_rows(populations_corr)

In [None]:
populations_corr  %>% arrange(pval)

## L2 CD4

In [None]:
population_colnames_l2  <- which(substr(colnames(md4),start = 1,stop = 2) == "L2" &
                                 grepl(colnames(md4), pattern = "CD4"))

In [None]:
population_colnames_l2

In [None]:
md4

In [None]:
calc_correlation  <- function(i){
    df  <- md4  %>% dplyr::select(i,2)  %>% filter(!is.na(2))
   colnames(df)  <- c("value","fasting_cpept_t1")
    cor  <- cor.test(df$value, df$fasting_cpept_t1)
    res_df1  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept_T1")
    
    df  <- md4  %>% dplyr::select(i,3)  %>% filter(!is.na(3))
    colnames(df)  <- c("value","fasting_cpept_1")
    cor  <- cor.test(df$value, df$fasting_cpept_1)
    res_df2  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept")
    
    df  <- md4  %>% dplyr::select(i,4)  %>% filter(!is.na(4))
    colnames(df)  <- c("value","c_peptide_change")
    cor  <- cor.test(df$value, df$c_peptide_change)
    res_df3  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "c_peptide_change")
    
    res_df  <- rbind(res_df1, res_df2, res_df3)
    return(res_df)
}

In [None]:
populations_corr_cd4  <- future_map(population_colnames_l2, calc_correlation)

In [None]:
populations_corr  <- bind_rows(populations_corr_cd4)

populations_corr  %>% arrange(pval)

In [None]:
populations_corr  <- populations_corr  %>% mutate(population = gsub(population, pattern = "L2 CD4 T cells---", replacement = ""))  %>% 
    mutate(population = gsub(population, pattern = "L2 CD4 Unconventional T cells---", replacement = ""))

In [None]:
options(repr.plot.width = 36, repr.plot.height = 7)
populations_corr  %>% 
dplyr::filter(test == "c_peptide_change")  %>% 
arrange(cor)  %>% 
ggplot(aes(x = test, y = reorder(population, cor))) +
geom_point(aes(size = -log(pval), color = cor)) + 
facet_wrap(~test) +
ylab("") + xlab("") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + ggtheme() + theme(axis.text.x = element_text(angle = 90)) +

populations_corr  %>% 
dplyr::filter(test == "fasting_cpept_T1")  %>% 
arrange(cor)  %>% 
ggplot(aes(x = test, y = reorder(population, cor))) +
geom_point(aes(size = -log(pval), color = cor)) + 
facet_wrap(~test) +
ylab("") + xlab("") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + ggtheme() + theme(axis.text.x = element_text(angle = 90)) +

populations_corr  %>% 
dplyr::filter(test == "fasting_cpept")  %>% 
arrange(cor)  %>% 
ggplot(aes(x = test, y = reorder(population, cor))) +
geom_point(aes(size = -log(pval), color = cor)) + 
facet_wrap(~test) +
ylab("") + xlab("") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + ggtheme() + theme(axis.text.x = element_text(angle = 90)) 

In [None]:
ggsave("../figures/populations_correlations/population_correlations_L2_cd4.png", width = 72, height = 15, units = "cm", create.dir = TRUE)
ggsave("../figures/populations_correlations/population_correlations_L2_cd4.svg", width = 72, height = 15, units = "cm")

## L2 CD8

In [None]:
population_colnames_l2  <- which(substr(colnames(md4),start = 1,stop = 2) == "L2" &
                                 grepl(colnames(md4), pattern = "CD8"))

In [None]:
population_colnames_l2

In [None]:
md4

In [None]:
calc_correlation  <- function(i){
    df  <- md4  %>% dplyr::select(i,2)  %>% filter(!is.na(2))
   colnames(df)  <- c("value","fasting_cpept_t1")
    cor  <- cor.test(df$value, df$fasting_cpept_t1)
    res_df1  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept_T1")
    
    df  <- md4  %>% dplyr::select(i,3)  %>% filter(!is.na(3))
    colnames(df)  <- c("value","fasting_cpept_1")
    cor  <- cor.test(df$value, df$fasting_cpept_1)
    res_df2  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept")
    
    df  <- md4  %>% dplyr::select(i,4)  %>% filter(!is.na(4))
    colnames(df)  <- c("value","c_peptide_change")
    cor  <- cor.test(df$value, df$c_peptide_change)
    res_df3  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "c_peptide_change")
    
    res_df  <- rbind(res_df1, res_df2, res_df3)
    return(res_df)
}

In [None]:
populations_corr_cd8  <- future_map(population_colnames_l2, calc_correlation)

In [None]:
populations_corr  <- bind_rows(populations_corr_cd8)

populations_corr  %>% arrange(pval)

In [None]:
populations_corr  <- populations_corr  %>% mutate(population = gsub(population, pattern = "L2 CD8 T cells---", replacement = ""))  %>% 
    mutate(population = gsub(population, pattern = "L2 CD8 Unconventional T cells---", replacement = ""))

In [None]:
options(repr.plot.width = 36, repr.plot.height = 7)
populations_corr  %>% 
dplyr::filter(test == "c_peptide_change")  %>% 
arrange(cor)  %>% 
ggplot(aes(x = test, y = reorder(population, cor))) +
geom_point(aes(size = -log(pval), color = cor)) + 
facet_wrap(~test) +
ylab("") + xlab("") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + ggtheme() + theme(axis.text.x = element_text(angle = 90)) +

populations_corr  %>% 
dplyr::filter(test == "fasting_cpept_T1")  %>% 
arrange(cor)  %>% 
ggplot(aes(x = test, y = reorder(population, cor))) +
geom_point(aes(size = -log(pval), color = cor)) + 
facet_wrap(~test) +
ylab("") + xlab("") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + ggtheme() + theme(axis.text.x = element_text(angle = 90)) +

populations_corr  %>% 
dplyr::filter(test == "fasting_cpept")  %>% 
arrange(cor)  %>% 
ggplot(aes(x = test, y = reorder(population, cor))) +
geom_point(aes(size = -log(pval), color = cor)) + 
facet_wrap(~test) +
ylab("") + xlab("") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + ggtheme() + theme(axis.text.x = element_text(angle = 90)) 

In [None]:
ggsave("../figures/populations_correlations/population_correlations_L2_cd8.png", width = 72, height = 15, units = "cm", create.dir = TRUE)
ggsave("../figures/populations_correlations/population_correlations_L2_cd8.svg", width = 72, height = 15, units = "cm")

# Treg vs cpept

In [None]:
population_colnames_l3  <- which(substr(colnames(md4),start = 1,stop = 2) == "L3")

In [None]:
population_colnames_l3

In [None]:
calc_correlation  <- function(i){
    df  <- md4  %>% dplyr::select(i,2)  %>% filter(!is.na(2))
   colnames(df)  <- c("value","fasting_cpept_t1")
    cor  <- cor.test(df$value, df$fasting_cpept_t1)
    res_df1  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept_T1")
    
    df  <- md4  %>% dplyr::select(i,3)  %>% filter(!is.na(3))
    colnames(df)  <- c("value","fasting_cpept_1")
    cor  <- cor.test(df$value, df$fasting_cpept_1)
    res_df2  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept")
    
    df  <- md4  %>% dplyr::select(i,4)  %>% filter(!is.na(4))
    colnames(df)  <- c("value","c_peptide_change")
    cor  <- cor.test(df$value, df$c_peptide_change)
    res_df3  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "c_peptide_change")
    
    res_df  <- rbind(res_df1, res_df2, res_df3)
    return(res_df)
}

In [None]:
populations_corr_l3  <- future_map(population_colnames_l3, calc_correlation)

In [None]:
populations_corr  <- bind_rows(populations_corr_l3)

In [None]:
populations_corr  %>% filter(grepl(populations_corr$population, pattern = "Treg"))

In [None]:
md4

In [None]:
ls()

In [None]:
options(repr.plot.width = 12, repr.plot.height = 5)
j = 1
for(i in (populations_corr  %>% filter(grepl(population, pattern = "Treg"))  %>% 
           pull(population))[1:12]  %>% unique){
    df2 <- md4  %>% dplyr::select(which(colnames(md4)==i), 
                                  fasting_cpept_T1 = 2, 
                                  fasting_cpept_1 = 3,
                                  c_peptide_change = 4) 
    colnames(df2)[1]  <- c("value")
  p1 <-  df2 %>%  ggplot(aes(x=value, y=fasting_cpept_T1)) +
  geom_point(shape = 16, size = 2) +
  geom_smooth(method=lm, alpha = 0.2) + ggtitle(gsub(i, pattern = "L3 CD4 T cells---Treg---", replacement = " ")) + 
stat_cor(size = 7) + theme_classic() + ggtheme() 

    p2 <-  df2 %>%  ggplot(aes(x=value, y=fasting_cpept_1)) +
  geom_point(shape = 16, size = 2) +
  geom_smooth(method=lm, alpha = 0.2) + ggtitle(" ") + 
stat_cor(size = 7) + theme_classic() + ggtheme() 

    p3 <-  df2 %>%  ggplot(aes(x=value, y=c_peptide_change)) +
  geom_point(shape = 16, size = 2) +
  geom_smooth(method=lm, alpha = 0.2) + ggtitle(" ") + 
stat_cor(size = 7) + theme_classic() + ggtheme() 

    print(p1 + p2 + p3)
    j = j+1
library(svglite)
dir.create("../figures/correlation_populations/")
ggsave(filename = paste0("../figures/correlation_populations/",i,".svg"), width = 12, height = 4)
    }

### Both patient samples T1 and T0

In [None]:
md5  <- md_cpept  %>% left_join(md3)  %>% dplyr::filter(!is.na(fasting_cpept_1))

In [None]:
population_colnames_l3  <- which(substr(colnames(md5),start = 1,stop = 2) == "L3")

In [None]:
population_colnames_l3

In [None]:
calc_correlation  <- function(i){
    df  <- md5  %>% dplyr::select(i,2)  %>% filter(!is.na(2))
   colnames(df)  <- c("value","fasting_cpept_t1")
    cor  <- cor.test(df$value, df$fasting_cpept_t1)
    res_df1  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept_T1")
    
    df  <- md5  %>% dplyr::select(i,3)  %>% filter(!is.na(3))
    colnames(df)  <- c("value","fasting_cpept_1")
    cor  <- cor.test(df$value, df$fasting_cpept_1)
    res_df2  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "fasting_cpept")
    
    df  <- md5  %>% dplyr::select(i,4)  %>% filter(!is.na(4))
    colnames(df)  <- c("value","c_peptide_change")
    cor  <- cor.test(df$value, df$c_peptide_change)
    res_df3  <- data.frame(population = colnames(md4)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*10>1,1,cor$p.value*10),
                         test = "c_peptide_change")
    
    res_df  <- rbind(res_df1, res_df2, res_df3)
    return(res_df)
}

In [None]:
populations_corr_l3  <- future_map(population_colnames_l3, calc_correlation)

In [None]:
populations_corr  <- bind_rows(populations_corr_l3)

In [None]:
populations_corr  %>% filter(grepl(populations_corr$population, pattern = "Treg"))

In [None]:
options(repr.plot.width = 12, repr.plot.height = 5)
j = 1
for(i in (populations_corr  %>% filter(grepl(population, pattern = "Treg"))  %>% 
          arrange(pval)  %>% pull(population))[1:12]){
    df2 <- md5  %>% dplyr::select(which(colnames(md5)==i), 
                                  fasting_cpept_T1 = 2, 
                                  fasting_cpept_1 = 3,
                                  c_peptide_change = 4) 
    colnames(df2)[1]  <- c("value")
  p1 <-  df2 %>%  ggplot(aes(x=value, y=fasting_cpept_T1)) +
  geom_point(shape = 16, size = 2) +
  geom_smooth(method=lm, alpha = 0.2) + ggtitle(gsub(i, pattern = "L3 CD4 T cells---Treg---", replacement = " ")) + 
stat_cor(size = 7) + theme_classic() + ggtheme() 

    p2 <-  df2 %>%  ggplot(aes(x=value, y=fasting_cpept_1)) +
  geom_point(shape = 16, size = 2) +
  geom_smooth(method=lm, alpha = 0.2) + ggtitle(" ") + 
stat_cor(size = 7) + theme_classic() + ggtheme() 

    p3 <-  df2 %>%  ggplot(aes(x=value, y=c_peptide_change)) +
  geom_point(shape = 16, size = 2) +
  geom_smooth(method=lm, alpha = 0.2) + ggtitle(" ") + 
stat_cor(size = 7) + theme_classic() + ggtheme() 

    print(p1 + p2 + p3)
    j = j+1
#library(svglite)
#dir.create("../figures/correlation_populations/")
#ggsave(filename = paste0("../figures/correlation_populations/",i,".svg"), width = 5, height = 4.5)
    }