# Sample Summary

In [1]:
suppressMessages({library(dplyr)
                  library(SummarizedExperiment)})

## Functions

In [2]:
get_mds <- function(){
    mds_file = "/ceph/projects/v4_phase3_paper/inputs/genotypes/mds/_m/LIBD_Brain_TopMed.mds"
    mds = data.table::fread(mds_file) %>%
        rename_at(.vars = vars(starts_with("C")),
                  function(x){sub("C", "snpPC", x)}) %>%
        mutate_if(is.character, as.factor)
    return(mds)
}

memMDS <- memoise::memoise(get_mds)

get_pheno <- function(){
    counts_lt = "/ceph/projects/v4_phase3_paper/inputs/counts/_m/caudate_brainseq_phase3_hg38_rseGene_merged_n464.rda"
    load(counts_lt)
    rse_df = rse_gene
    keepIndex = which(rse_df$Age > 13 & rse_df$Race %in% c("AA", "CAUC"))
    rse_df = rse_df[, keepIndex]
    rse_df$Sex <- factor(rse_df$Sex)
    rse_df <- jaffelab::merge_rse_metrics(rse_df)
    colData(rse_df)$RIN = sapply(colData(rse_df)$RIN,"[",1)
    rownames(colData(rse_df)) <- sapply(strsplit(rownames(colData(rse_df)), "_"), "[", 1)
    pheno = colData(rse_df) %>% as.data.frame %>% 
        inner_join(memMDS(), by=c("BrNum"="FID")) %>% 
        distinct(RNum, .keep_all = TRUE) 
    return(pheno)
}

memPHENO <- memoise::memoise(get_pheno)

## Sample breakdown

### eQTL analysis

In [3]:
cols = c('BrNum','RNum','Region','RIN','Age','Sex','Race','Dx', 
         'ERCCsumLogErr', 'overallMapRate', 'totalAssignedGene', 
         'mitoRate', 'rRNA_rate', 'snpPC1', 'snpPC2', 'snpPC3')

df = memPHENO() %>% select(all_of(cols))
df %>% dim

In [4]:
print(paste("There are",df$BrNum %>% unique %>% length, "unique BrNum."))

[1] "There are 443 unique BrNum."


In [5]:
table(df$Dx)


Bipolar Control  Schizo 
     44     245     154 

In [6]:
table(df$Dx, df$Sex)

         
            F   M
  Bipolar  16  28
  Control  76 169
  Schizo   50 104

In [7]:
table(df$Dx, df$Race)

         
           AA CAUC
  Bipolar   4   40
  Control 123  122
  Schizo   83   71

In [8]:
table(df$Sex)


  F   M 
142 301 

In [9]:
table(df$Race)


  AA CAUC 
 210  233 

#### Mean

In [10]:
df %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ mean(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,7.822727,43.21182
Control,7.857143,47.46478
Schizo,7.875974,51.74896


#### Median

In [11]:
df %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ median(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,7.65,44.785
Control,7.8,48.69
Schizo,7.9,51.97


#### Standard deviation

In [12]:
df %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ sd(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,0.806619,12.51277
Control,0.7914502,16.54702
Schizo,0.96927,14.96908


### Expression analysis

In [13]:
df2 = df %>% filter(Age > 17)
df2 %>% dim

In [14]:
print(paste("There are",df2$BrNum %>% unique %>% length, "unique BrNum."))

[1] "There are 437 unique BrNum."


In [15]:
table(df2$Dx)


Bipolar Control  Schizo 
     44     239     154 

In [16]:
table(df2$Dx, df2$Sex)

         
            F   M
  Bipolar  16  28
  Control  71 168
  Schizo   50 104

In [17]:
table(df2$Sex, df2$Race)

   
     AA CAUC
  F  78   59
  M 131  169

In [18]:
table(df2$Race)


  AA CAUC 
 209  228 

In [19]:
table(df2$Sex)


  F   M 
137 300 

#### Mean

In [20]:
df2 %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ mean(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,7.822727,43.21182
Control,7.855649,48.29879
Schizo,7.875974,51.74896


#### Median

In [21]:
df2 %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ median(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,7.65,44.785
Control,7.8,48.77
Schizo,7.9,51.97


#### Standard deviation

In [22]:
df2 %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ sd(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,0.806619,12.51277
Control,0.7925331,15.87895
Schizo,0.96927,14.96908


## Reproducibility Information

In [23]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()

[1] "2021-08-05 12:10:10 EDT"

   user  system elapsed 
 12.346   0.617  13.288 

─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.0.3 (2020-10-10)
 os       Arch Linux                  
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2021-08-05                  

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package              * version  date       lib source                                   
 assertthat             0.2.1    2019-03-21 [1] CRAN (R 4.0.2)                           
 base64enc              0.1-3    2015-07-28 [1] CRAN (R 4.0.2)                           
 Biobase              * 2.50.0   2020-10-27 [1] Bioconductor                             
 BiocGeneri