# Run a Monte Carlo simulation to determine significant overlap of DEGs between multiple brain regions

In [None]:
library(tidyverse)

## Load overlap annotation

In [None]:
annot = data.table::fread("../../_m/brainseq_deg_across_tissues_comparison.csv")
annot %>% head(2)

In [None]:
total = dim(annot)[1] ## Unique elements in the list
caudate = dim(filter(annot, Caudate == 1))[1]
dlpfc = dim(filter(annot, DLPFC == 1))[1]
hippocampus = dim(filter(annot, Hippocampus == 1))[1]
gyrus = dim(filter(annot, `Dentate Gyrus` == 1))[1]

## Monte Carlo simulation

### Four brain regions

In [None]:
sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    D=sample(1:total, dlpfc); 
    H=sample(1:total, hippocampus); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(C,D,H,G))))
}))

In [None]:
hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, DLPFC == 1, 
                    Hippocampus == 1, `Dentate Gyrus` == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

### Three brain regions

#### Caudate vs DLPFC vs Hippocampus

In [None]:
total = Reduce(union, list(filter(annot, Caudate == 1)$gene_id, 
                           filter(annot, DLPFC == 1)$gene_id, 
                           filter(annot, Hippocampus == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    D=sample(1:total, dlpfc); 
    H=sample(1:total, hippocampus); 
    return(length(Reduce(intersect, list(C,D,H))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, DLPFC == 1, Hippocampus == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### Caudate vs DLPFC vs Dentate Gyrus

In [None]:
total = Reduce(union, list(filter(annot, Caudate == 1)$gene_id, 
                           filter(annot, DLPFC == 1)$gene_id, 
                           filter(annot, `Dentate Gyrus` == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    D=sample(1:total, dlpfc); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(C,D,G))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, DLPFC == 1, `Dentate Gyrus` == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### Caudate vs Hippocampus vs Dentate Gyrus

In [None]:
total = Reduce(union, list(filter(annot, Caudate == 1)$gene_id, 
                           filter(annot, Hippocampus == 1)$gene_id, 
                           filter(annot, `Dentate Gyrus` == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    H=sample(1:total, hippocampus); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(C,H,G))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, `Dentate Gyrus` == 1, Hippocampus == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### DLPFC vs Hippocampus vs Dentate Gyrus

In [None]:
total = Reduce(union, list(filter(annot, Hippocampus == 1)$gene_id, 
                           filter(annot, DLPFC == 1)$gene_id, 
                           filter(annot, `Dentate Gyrus` == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    D=sample(1:total, dlpfc); 
    H=sample(1:total, hippocampus); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(D,H,G))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, `Dentate Gyrus` == 1, DLPFC == 1, Hippocampus == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

### Two brain regions

#### Caudate vs DLPFC

In [None]:
total = Reduce(union, list(filter(annot, Caudate == 1)$gene_id, 
                           filter(annot, DLPFC == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    D=sample(1:total, dlpfc); 
    return(length(Reduce(intersect, list(C,D))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, DLPFC == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### Caudate vs Hippocampus

In [None]:
total = Reduce(union, list(filter(annot, Caudate == 1)$gene_id, 
                           filter(annot, Hippocampus == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    H=sample(1:total, hippocampus); 
    return(length(Reduce(intersect, list(C,H))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, Hippocampus == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### Caudate vs Dentate Gyrus

In [None]:
total = Reduce(union, list(filter(annot, Caudate == 1)$gene_id, 
                           filter(annot, `Dentate Gyrus` == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    C=sample(1:total, caudate); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(C,G))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Caudate == 1, `Dentate Gyrus` == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### DLPFC vs Hippocampus

In [None]:
total = Reduce(union, list(filter(annot, DLPFC == 1)$gene_id, 
                           filter(annot, Hippocampus == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    D=sample(1:total, dlpfc); 
    H=sample(1:total, hippocampus); 
    return(length(Reduce(intersect, list(D,H))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, DLPFC == 1, Hippocampus == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### DLPFC vs Dentate Gyrus

In [None]:
total = Reduce(union, list(filter(annot, DLPFC == 1)$gene_id, 
                           filter(annot, `Dentate Gyrus` == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    D=sample(1:total, dlpfc); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(D,G))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, DLPFC == 1, `Dentate Gyrus` == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

#### Hippocampus vs Dentate Gyrus

In [None]:
total = Reduce(union, list(filter(annot, Hippocampus == 1)$gene_id, 
                           filter(annot, `Dentate Gyrus` == 1)$gene_id)) %>% length

sim = unlist(lapply(1:10000, function(i){
    H=sample(1:total, hippocampus); 
    G=sample(1:total, gyrus);
    return(length(Reduce(intersect, list(H,G))))
}))

hist(sim)

In [None]:
shared = dim(filter(annot, Hippocampus == 1, `Dentate Gyrus` == 1))[1]
print(paste("The max overlap from simulation:",max(sim)))
print(paste("The overlap is:", shared))
## Zero instances are greater than overlap
## So pvalues is 1 / (10000 + 1), adding my value into this
pval1 = (sum(sim >= shared) + 1) / (10000 + 1)
pval2 = (sum(sim <= shared) + 1) / (10000 + 1)
print(paste("Enrichment p-value <",format(pval1, digits=2)))
print(paste("Depletion p-value <",format(pval2, digits=2)))

## Reproducibility Information

In [None]:
print("Reproducibility Information:")
Sys.time()
proc.time()
options(width=120)
sessioninfo::session_info()