# Sample Summary

In [1]:
suppressMessages({library(dplyr)
                  library(SummarizedExperiment)})

## Functions

In [2]:
get_mds <- function(){
    mds_file = "/ceph/projects/v4_phase3_paper/inputs/genotypes/mds/_m/LIBD_Brain_TopMed.mds"
    mds = data.table::fread(mds_file) %>%
        rename_at(.vars = vars(starts_with("C")),
                  function(x){sub("C", "snpPC", x)}) %>%
        mutate_if(is.character, as.factor)
    return(mds)
}

memMDS <- memoise::memoise(get_mds)

get_pheno <- function(tissue){
    counts_lt = list("caudate"="/ceph/projects/v4_phase3_paper/inputs/counts/_m/caudate_brainseq_phase3_hg38_rseGene_merged_n464.rda", 
                     "dlpfc"="/ceph/projects/v4_phase3_paper/inputs/counts/_m/dlpfc_ribozero_brainseq_phase2_hg38_rseGene_merged_n453.rda",
                     "hippocampus"="/ceph/projects/v4_phase3_paper/inputs/counts/_m/hippo_brainseq_phase2_hg38_rseGene_merged_n447.rda")
    load(counts_lt[[tissue]])
    rse_df = rse_gene
    keepIndex = which((rse_df$Dx %in% c("Control", "Schizo")) & 
                      rse_df$Age > 13 & rse_df$Race %in% c("AA", "CAUC"))
    rse_df = rse_df[, keepIndex]
    rse_df$Dx = factor(rse_df$Dx, levels = c("Control", "Schizo"))
    rse_df$Sex <- factor(rse_df$Sex)
    rse_df <- jaffelab::merge_rse_metrics(rse_df)
    colData(rse_df)$RIN = sapply(colData(rse_df)$RIN,"[",1)
    rownames(colData(rse_df)) <- sapply(strsplit(rownames(colData(rse_df)), "_"), "[", 1)
    pheno = colData(rse_df) %>% as.data.frame %>% 
        inner_join(memMDS(), by=c("BrNum"="FID")) %>% 
        distinct(RNum, .keep_all = TRUE) 
    return(pheno)
}

memPHENO <- memoise::memoise(get_pheno)

## Sample breakdown

### eQTL analysis

In [3]:
cols = c('BrNum','RNum','Region','RIN','Age','Sex','Race','Dx', 
         'ERCCsumLogErr', 'overallMapRate', 'totalAssignedGene', 
         'mitoRate', 'rRNA_rate', 'snpPC1', 'snpPC2', 'snpPC3')

cc = memPHENO("caudate") %>% select(all_of(cols))
dd = memPHENO("dlpfc") %>% select(all_of(cols))
hh = memPHENO("hippocampus") %>% select(all_of(cols))
df = bind_rows(cc, dd, hh)
df %>% dim

In [4]:
print(paste("There are",df$BrNum %>% unique %>% length, "unique BrNum."))

[1] "There are 504 unique BrNum."


In [5]:
table(df$Region)


Caudate   DLPFC   HIPPO 
    399     377     394 

In [6]:
table(df$Region, df$Dx)

         
          Control Schizo
  Caudate     245    154
  DLPFC       229    148
  HIPPO       261    133

In [7]:
table(df$Region, df$Sex)

         
            F   M
  Caudate 126 273
  DLPFC   121 256
  HIPPO   126 268

In [8]:
table(df$Region, df$Race)

         
           AA CAUC
  Caudate 206  193
  DLPFC   204  173
  HIPPO   213  181

In [9]:
df %>% group_by(Region, Sex, Dx) %>% summarise(N=n())

`summarise()` has grouped output by 'Region', 'Sex'. You can override using the `.groups` argument.



Region,Sex,Dx,N
<chr>,<fct>,<fct>,<int>
Caudate,F,Control,76
Caudate,F,Schizo,50
Caudate,M,Control,169
Caudate,M,Schizo,104
DLPFC,F,Control,73
DLPFC,F,Schizo,48
DLPFC,M,Control,156
DLPFC,M,Schizo,100
HIPPO,F,Control,79
HIPPO,F,Schizo,47


#### Mean

In [10]:
df %>% group_by(Region) %>%
    summarise(across(c("RIN", "Age", "rRNA_rate", "mitoRate"), ~ mean(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.864411,49.11832,5.687892e-05,0.03651875
DLPFC,7.698674,45.82111,5.056699e-05,0.02173725
HIPPO,7.617513,45.48041,2.978609e-05,0.2268814


#### Median

In [11]:
df %>% group_by(Region) %>%
    summarise(across(c("RIN", "Age", "rRNA_rate", "mitoRate"), ~ median(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.8,49.65,4.523605e-05,0.03439737
DLPFC,7.8,48.1,4.0606e-05,0.02063706
HIPPO,7.8,47.515,1.991029e-05,0.23442298


#### Standard deviation

In [12]:
df %>% group_by(Region) %>%
    summarise(across(c("RIN", "Age", "rRNA_rate", "mitoRate"), ~ sd(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,0.8632861,16.07355,3.630981e-05,0.01664994
DLPFC,0.9352001,16.51391,3.180953e-05,0.00773969
HIPPO,1.0323633,16.43348,2.769952e-05,0.09132075


### Expression analysis

In [13]:
df2 = df %>% filter(Age > 17)
df2 %>% dim

In [14]:
print(paste("There are",df2$BrNum %>% unique %>% length, "unique BrNum."))

[1] "There are 480 unique BrNum."


In [15]:
table(df2$Region)


Caudate   DLPFC   HIPPO 
    393     359     375 

In [17]:
table(df2$Region, df2$Dx)

         
          Control Schizo
  Caudate     239    154
  DLPFC       211    148
  HIPPO       242    133

In [18]:
table(df2$Region, df2$Sex)

         
            F   M
  Caudate 121 272
  DLPFC   114 245
  HIPPO   121 254

In [19]:
table(df2$Region, df2$Race)

         
           AA CAUC
  Caudate 205  188
  DLPFC   200  159
  HIPPO   207  168

#### Mean

In [20]:
df2 %>% group_by(Region) %>%
    summarise(across(c("RIN", "Age", "rRNA_rate", "mitoRate"), ~ mean(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.863613,49.65076,5.65874e-05,0.03639159
DLPFC,7.666295,47.35662,5.149245e-05,0.02148476
HIPPO,7.598667,47.02501,3.02671e-05,0.22567167


#### Median

In [21]:
df2 %>% group_by(Region) %>%
    summarise(across(c("RIN", "Age", "rRNA_rate", "mitoRate"), ~ median(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,7.8,50.0,4.447903e-05,0.03439737
DLPFC,7.8,48.66,4.175559e-05,0.0204437
HIPPO,7.8,48.34,1.997499e-05,0.23428324


#### Standard deviation

In [22]:
df2 %>% group_by(Region) %>%
    summarise(across(c("RIN", "Age", "rRNA_rate", "mitoRate"), ~ sd(.x, na.rm = TRUE)))

Region,RIN,Age,rRNA_rate,mitoRate
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Caudate,0.8649491,15.60086,3.634418e-05,0.016609313
DLPFC,0.9221092,15.38859,3.224609e-05,0.007513765
HIPPO,1.0321689,15.29984,2.823267e-05,0.092901235


In [23]:
df2 %>% group_by(Region, Sex, Dx) %>% summarise(N=n())

`summarise()` has grouped output by 'Region', 'Sex'. You can override using the `.groups` argument.



Region,Sex,Dx,N
<chr>,<fct>,<fct>,<int>
Caudate,F,Control,71
Caudate,F,Schizo,50
Caudate,M,Control,168
Caudate,M,Schizo,104
DLPFC,F,Control,66
DLPFC,F,Schizo,48
DLPFC,M,Control,145
DLPFC,M,Schizo,100
HIPPO,F,Control,74
HIPPO,F,Schizo,47


## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()