# Boxplots for permutations analysis (male downsampling to female level)

In [None]:
library(tidyverse)
library(ggpubr)

## Function and configuration

In [None]:
config = list(
    "Caudate"="../../../caudate/subsampling_male/deg_summary/_m/permutations.csv", 
    "DLPFC"="../../../dlpfc/subsampling_male/deg_summary/_m/permutations.csv", 
    "Hippocampus"="../../../hippocampus/subsampling_male/deg_summary/_m/permutations.csv"
)

In [None]:
save_ggplots <- function(p, fn, w=6, h=6){
    for(ext in c('.svg', '.png', '.pdf')){
        ggsave(p, filename=paste0(fn, ext), width=w, height=h)
    }
}

merge_data <- function(){
    datalist = list()
    for(tissue in c("Caudate", "DLPFC", "Hippocampus")){
        df = data.table::fread(config[[tissue]])
        df$Tissue <- tissue  # maybe you want to keep track of which iteration produced it?
        datalist[[tissue]] <- df
    }
    return(bind_rows(datalist) %>% 
           select(Symbol, gencodeID, logFC, t, adj.P.Val, Permutation, Tissue))
}

get_perm_data <- function(df, tissue){
    return(df %>% group_by(Permutation, Tissue) %>% 
           summarize(Size = n()) %>% 
           replace_na(list(DLPFC = 0, Hippocampus = 0, Caudate = 0)) %>% 
           filter(`Tissue` == tissue))
}

get_female_degs <- function(tissue){
    fn = paste0("../../../", tolower(tissue), 
                "/female_analysis/_m/genes/diffExpr_szVctl_full.txt")
    return(data.table::fread(fn) %>% filter("adj.P.Val" < 0.05))
}

cal_zscore <- function(df, tissue){
    x = dim(get_female_degs(tissue))[1]
    mu = mean(get_perm_data(df, tissue)$Size)
    sigma = sd(get_perm_data(df, tissue)$Size)
    return((x - mu) / sigma)
}

## Merge dataframes

In [None]:
big_df <- merge_data()
big_df %>% head()

## Calculate significance

In [None]:
z_scores = c(); two_tail = c(); tissues = c();
for(tissue in c("Caudate", "DLPFC", "Hippocampus")){
    tissues = c(tissues, tissue)
    ## Z-score
    q = cal_zscore(big_df, tissue); z_scores = c(z_scores, q)
    ## Convert to p-value
    two_tail = c(two_tail, 2*pnorm(q, mean=0, sd=1, lower.tail=TRUE))
}

dt = data.frame("Tissue"=tissues, "Z_score"=z_scores, "P_Value"=two_tail)
dt

In [None]:
dt %>% data.table::fwrite("permutation_pvalues.tsv", sep='\t')

## Plot figures

In [None]:
df = big_df %>% group_by(Permutation, Tissue) %>% summarize(Size = n()) %>%
    as.data.frame %>% pivot_wider(names_from = Tissue, values_from = Size) %>%
    replace_na(list(DLPFC = 0, Hippocampus = 0, Caudate = 0)) %>% 
    pivot_longer(-Permutation, names_to="Tissue", values_to="DEGs") %>%
    mutate_if(is.character, as.factor)
df %>% head()

In [None]:
hist = gghistogram(df, x="DEGs", fill="lightgray", bins=30, rug=TRUE, 
                   facet.by="Tissue", ncol=1, ylab="Count in Permutation",
                   xlab="Number of SZ DEGs\n(Subsampled Male Only)",
                   panel.labs.font=list(face='bold', size=18),
                   ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
        font("xy.title", face="bold", size=18)
hist

In [None]:
save_ggplots(hist, "permutation_histogram", 6, 7)

## Permutation DEGs Summary

In [None]:
big_df %>% group_by(Permutation, Tissue) %>% summarize(Size = n()) %>%
    as.data.frame %>% pivot_wider(names_from = Tissue, values_from = Size) %>%
    replace_na(list(DLPFC = 0, Hippocampus = 0, Caudate = 0)) %>% 
    pivot_longer(-Permutation, names_to="Tissue", values_to="DEGs") %>%
    mutate_if(is.character, as.factor) %>% 
    group_by(Tissue) %>% summarize(Mean = mean(DEGs), Median = median(DEGs), Std = sd(DEGs))

## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()