# Sample Summary

In [1]:
suppressMessages({library(dplyr)
                  library(SummarizedExperiment)})

## Functions

In [2]:
get_mds <- function(){
    mds_file = "/ceph/projects/v4_phase3_paper/inputs/genotypes/mds/_m/LIBD_Brain_TopMed.mds"
    mds = data.table::fread(mds_file) %>%
        rename_at(.vars = vars(starts_with("C")),
                  function(x){sub("C", "snpPC", x)}) %>%
        mutate_if(is.character, as.factor)
    return(mds)
}

memMDS <- memoise::memoise(get_mds)

get_pheno <- function(){
    counts_lt = "/ceph/projects/v4_phase3_paper/inputs/counts/_m/caudate_brainseq_phase3_hg38_rseGene_merged_n464.rda"
    load(counts_lt)
    rse_df = rse_gene
    keepIndex = which(rse_df$Age > 13 & rse_df$Race %in% c("AA", "CAUC"))
    rse_df = rse_df[, keepIndex]
    rse_df$Sex <- factor(rse_df$Sex)
    rse_df <- jaffelab::merge_rse_metrics(rse_df)
    colData(rse_df)$RIN = sapply(colData(rse_df)$RIN,"[",1)
    rownames(colData(rse_df)) <- sapply(strsplit(rownames(colData(rse_df)), "_"), "[", 1)
    pheno = colData(rse_df) %>% as.data.frame %>% 
        inner_join(memMDS(), by=c("BrNum"="FID")) %>% 
        distinct(RNum, .keep_all = TRUE) 
    return(pheno)
}

memPHENO <- memoise::memoise(get_pheno)

## Sample breakdown

### eQTL analysis

In [3]:
cols = c('BrNum','RNum','Region','RIN','Age','Sex','Race','Dx', 
         'ERCCsumLogErr', 'overallMapRate', 'totalAssignedGene', 
         'mitoRate', 'rRNA_rate', 'snpPC1', 'snpPC2', 'snpPC3')

df = memPHENO() %>% select(all_of(cols))
df %>% dim

In [4]:
print(paste("There are",df$BrNum %>% unique %>% length, "unique BrNum."))

[1] "There are 443 unique BrNum."


In [5]:
table(df$Dx)


Bipolar Control  Schizo 
     44     245     154 

In [7]:
table(df$Dx, df$Sex)

         
            F   M
  Bipolar  16  28
  Control  76 169
  Schizo   50 104

In [8]:
table(df$Dx, df$Race)

         
           AA CAUC
  Bipolar   4   40
  Control 123  122
  Schizo   83   71

In [6]:
table(df$Sex)


  F   M 
142 301 

In [6]:
table(df$Race)


  F   M 
142 301 

#### Mean

In [11]:
df %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ mean(.x, na.rm = TRUE)))

Dx,RIN,Age
<chr>,<dbl>,<dbl>
Bipolar,7.822727,43.21182
Control,7.857143,47.46478
Schizo,7.875974,51.74896


#### Median

In [11]:
df %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ median(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.8,49.65,4.523605e-05,0.03439737
DLPFC,7.8,48.1,4.0606e-05,0.02063706
HIPPO,7.8,47.515,1.991029e-05,0.23442298


#### Standard deviation

In [12]:
df %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ sd(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,0.8632861,16.07355,3.630981e-05,0.01664994
DLPFC,0.9352001,16.51391,3.180953e-05,0.00773969
HIPPO,1.0323633,16.43348,2.769952e-05,0.09132075


### Expression analysis

In [13]:
df2 = df %>% filter(Age > 17)
df2 %>% dim

In [14]:
print(paste("There are",df2$BrNum %>% unique %>% length, "unique BrNum."))

[1] "There are 480 unique BrNum."


In [15]:
table(df2$Dx)


Caudate   DLPFC   HIPPO 
    393     359     375 

In [17]:
table(df2$Dx, df2$Sex)

         
          Control Schizo
  Caudate     239    154
  DLPFC       211    148
  HIPPO       242    133

In [18]:
table(df2$Sex, df2$Race)

         
            F   M
  Caudate 121 272
  DLPFC   114 245
  HIPPO   121 254

In [19]:
table(df2$Race)

         
           AA CAUC
  Caudate 205  188
  DLPFC   200  159
  HIPPO   207  168

In [19]:
table(df2$Sex)

         
           AA CAUC
  Caudate 205  188
  DLPFC   200  159
  HIPPO   207  168

#### Mean

In [20]:
df2 %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ mean(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.863613,49.65076,5.65874e-05,0.03639159
DLPFC,7.666295,47.35662,5.149245e-05,0.02148476
HIPPO,7.598667,47.02501,3.02671e-05,0.22567167


#### Median

In [21]:
df2 %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ median(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.8,50.0,4.447903e-05,0.03439737
DLPFC,7.8,48.66,4.175559e-05,0.0204437
HIPPO,7.8,48.34,1.997499e-05,0.23428324


#### Standard deviation

In [22]:
df2 %>% group_by(Dx) %>%
    summarise(across(c("RIN", "Age"), ~ sd(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,0.8649491,15.60086,3.634418e-05,0.016609313
DLPFC,0.9221092,15.38859,3.224609e-05,0.007513765
HIPPO,1.0321689,15.29984,2.823267e-05,0.092901235


## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()