# Cell deconvolution comparison and quality control
**Performed by Louise Huuki**

In [None]:
library(tidyverse)
library(ggpubr)

## Functions

In [None]:
get_pheno <- function(){
    df = data.table::fread("/ceph/projects/v4_phase3_paper/inputs/phenotypes/_m/merged_phenotypes.csv") %>%
        filter(Dx %in% c("SZ", "CTL"), Age > 17)
    return(df)
}

memPHENO <- memoise::memoise(get_pheno)

save_img <- function(image, fn, w, h){
    for(ext in c(".svg", ".pdf", ".png")){
        ggsave(file=paste0(fn, ext), plot=image, width=w, height=h)
    }
}

## Prepare data

In [None]:
load("../_h/est_prop_Bisque.Rdata")

In [None]:
df = est_prop_bisque$caudate$Est.prop.long %>% 
    inner_join(memPHENO(), by=c("sample"="RNum")) %>%
    mutate_if(is.character, as.factor) %>%
    rename("Proportion"="prop")
df[1:2, 1:10]

## Cell type proportion plots

In [None]:
bxp = df %>% ggboxplot(x="cell_type", y="Proportion", color="Dx", #facet.by="Cell_Type",
                 panel.labs.font=list(face='bold', size = 14), palette="npg", 
                 outlier.shape=NA, ylab='Cell Type Proportion', add='jitter', 
                 add.params=list(alpha=0.5), ylim=c(0, 1), xlab="Cell Types", 
                       legend="bottom") +
    font("xy.text", size=12) + font("xy.title", size=16, face="bold") + 
    rotate_x_text(45)
print(bxp)
save_img(bxp, paste0("boxplot_celltypes_dx"), w=7, h=6)

In [None]:
bxp = df %>% ggboxplot(x="Dx", y="Proportion", color="Dx", facet.by="cell_type",
                 panel.labs.font=list(face='bold', size = 14), palette="npg", 
                 outlier.shape=NA, ylab='Cell Type Proportion', add='jitter', 
                 add.params=list(alpha=0.5), ylim=c(0, 1.2), ncol=3, 
                       xlab="Diagnosis", legend="None") +
    stat_compare_means(comparisons=list(c("CTL", "SZ")), aes(label=..p.signif..), 
                       method="wilcox.test") +
    font("xy.text", size=12) + font("xy.title", size=16, face="bold")
print(bxp)
save_img(bxp, paste0("boxplot_dxBYcelltype"), w=7, h=6)

## Quality control

In [None]:
celltypes = unique(est_prop_bisque$caudate$Est.prop.long$cell_type)

### Normalized data

In [None]:
load("/ceph/projects/v4_phase3_paper/analysis/differential_expression/_m/genes/voomSVA.RData")
norm_df = v$E %>% t
pca_df1 = prcomp(norm_df, center=TRUE)$x

norm_dt = pca_df1 %>% as.data.frame %>% rownames_to_column("sample") %>% 
    select(c(sample, PC1, PC2, PC3, PC4, PC5)) %>% 
    pivot_longer(-sample, names_to="PC", values_to="PC_values") %>%
    mutate_if(is.character, as.factor)
norm_dt %>% head(2)

### Residualized data

In [None]:
res_df = data.table::fread(paste0("/ceph/projects/v4_phase3_paper/analysis/",
                                  "differential_expression/_m/genes/residualized_expression.tsv")) %>%
    column_to_rownames("V1") %>% t
pca_df2 = prcomp(res_df, center=TRUE)$x

res_dt = pca_df2 %>% as.data.frame %>% rownames_to_column("sample") %>% 
    select(c(sample, PC1, PC2, PC3, PC4, PC5)) %>% 
    pivot_longer(-sample, names_to="PC", values_to="PC_values") %>%
    mutate_if(is.character, as.factor)
res_dt %>% head(2)

### Plot scatter for cell types

In [None]:
dir.create("quality_control")

In [None]:
options(repr.plot.width=18, repr.plot.height=6)
for(ct in celltypes){
    flush.console()
    print(ct)
    ## Normalized
    sca = norm_dt %>% inner_join(df, by="sample") %>% filter(cell_type == ct) %>%
        ggscatter(y="PC_values", x="Proportion", color="Dx", facet.by=c('PC'), ncol=5, 
                  add='reg.line', conf.int=TRUE, cor.coef=TRUE, palette="npg",
                  xlab=paste(ct, "Proportion"), ylab="Normalized Expression",
                  panel.labs.font=list(face='bold', size = 14),
                  add.params=list(color="blue", fill="lightgray")) +
        font("xy.text", size=12) + font("xy.title", size=16, face="bold")
    save_img(sca, paste0("quality_control/scatter_log2cpm_dx_5pcs_",ct), w=18, h=6)
    print(sca)
    ## Residualized
    sca = res_dt %>% inner_join(df, by="sample") %>% filter(cell_type == ct) %>%
        ggscatter(y="PC_values", x="Proportion", color="Dx", facet.by=c('PC'), ncol=5, 
                  add='reg.line', conf.int=TRUE, cor.coef=TRUE, palette="npg",
                  xlab=paste(ct, "Proportion"), ylab="Residualized Expression",
                  panel.labs.font=list(face='bold', size = 14),
                  add.params=list(color="blue", fill="lightgray")) +
        font("xy.text", size=12) + font("xy.title", size=16, face="bold")
    save_img(sca, paste0("quality_control/scatter_resdf_dx_5pcs_",ct), w=18, h=6)
    print(sca)
}

In [None]:
options(repr.plot.width=18, repr.plot.height=6)
for(ct in celltypes){
    flush.console()
    print(ct)
    ## Normalized
    sca = norm_dt %>% inner_join(df, by="sample") %>% filter(cell_type == ct) %>%
        ggscatter(y="PC_values", x="Proportion", facet.by=c('PC'), ncol=5, 
                  add='reg.line', conf.int=TRUE, cor.coef=TRUE, palette="npg",
                  xlab=paste(ct, "Proportion"), ylab="Normalized Expression",
                  panel.labs.font=list(face='bold', size = 14),
                  add.params=list(color="blue", fill="lightgray")) +
        font("xy.text", size=12) + font("xy.title", size=16, face="bold")
    save_img(sca, paste0("quality_control/scatter_log2cpm_5pcs_",ct), w=18, h=6)
    print(sca)
    ## Residualized
    sca = res_dt %>% inner_join(df, by="sample") %>% filter(cell_type == ct) %>%
        ggscatter(y="PC_values", x="Proportion", facet.by=c('PC'), ncol=5, 
                  add='reg.line', conf.int=TRUE, cor.coef=TRUE, palette="npg",
                  xlab=paste(ct, "Proportion"), ylab="Residualized Expression",
                  panel.labs.font=list(face='bold', size = 14),
                  add.params=list(color="blue", fill="lightgray")) +
        font("xy.text", size=12) + font("xy.title", size=16, face="bold")
    save_img(sca, paste0("quality_control/scatter_resdf_5pcs_",ct), w=18, h=6)
    print(sca)
}

## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()