# **AS events vs X chromosomal escape**


It has been reported that differentially expressed sex-biased genes are likely to be 
linked to escape from X chromosome inactivation ([Landscape of X chromosome inactivation across human tissues.
Tukiainen T, et al. Nature 2017;550:244-248](https://pubmed.ncbi.nlm.nih.gov/29022598/)). 

Because of the observed overlap between sex-biased differential expression and AS, we hypothesized that AS events might be more commonly observed in X chromosomal genes that escape inactivation. 

## Retrieve data about X chromosomal escape
This data was extracted from the supplemental material of the Tukiainen paper. Genes are characterized as

1. Escape
2. Inactive
3. Variable
4. Unknown

We examine here the hypothesis that alternatively spliced genes are more likely to escape X inactivation than one would expect
from the X chromosomal average. We compare the proportions of genes labeled ``Escape`` and ``non-Escape`` (everything else) using a Fisher exact test.

In [None]:
suppressMessages({
    options(warn = -1) 
    library(ggplot2)
})

In [None]:
xchromgenetable <- read.table(file="../assets/TukiainenSuppFig2extract.tsv", header=FALSE, sep="\t",
                               skipNul=FALSE, stringsAsFactors = FALSE)
colnames(xchromgenetable)  <- c("ENSG.id","symbol","escape.status","par.status")

# Collect set of X chromosomal genes from the Tukiainen paper

In [None]:
head(xchromgenetable)

In [None]:
XchromGenes <- sort(xchromgenetable$symbol)
n_xchrom_genes <- length(XchromGenes)
message("Number of X chromosomal genes from Tukiainen paper: ", n_xchrom_genes)

In [None]:
xchrom_escaped <- sort(xchromgenetable[xchromgenetable$escape.status=='Escape',]$symbol)
n_xchrom_escaped <-length(xchrom_escaped)
xchrom_non_escaped <- sort(xchromgenetable[xchromgenetable$escape.status!='Escape',]$symbol)
n_xchrom_non_escaped <- length(xchrom_non_escaped)
if (n_xchrom_genes != (n_xchrom_escaped + n_xchrom_non_escaped)) {
    stop("Problems extracting correct number of X chromosomal genes")
}

message("X chromosomal genes from Tukiainen et al n=", n_xchrom_genes)
message("Escaped X chromomosal genes: ", n_xchrom_escaped, " of a total of ",n_xchrom_genes, " X-chromosomal genes: ", formatC((100*n_xchrom_escaped/n_xchrom_genes), digits=3),"%")
message("Non-Escaped X chromomosal genes: ", n_xchrom_non_escaped, " of a total of ", n_xchrom_non_escaped, " X-chromosomal genes: ", formatC((100*n_xchrom_non_escaped/n_xchrom_genes), digits=3),"%")

# Check overlap of differentially spliced genes with X-chromosomal escaped genes
First we retrieve significantly differentially alternatively spliced (DAS) genes

In [None]:
total_AS_Genes <- read.table(file="../data/Total_AS_by_geneSymbol.tsv", header=TRUE, sep="\t", skipNul=FALSE, stringsAsFactors = FALSE)

In [None]:
head(total_AS_Genes)

In [None]:
sigAsGenes <- sort(total_AS_Genes$GeneSymbol)
xchromosomalSigAsGenes <- intersect(sigAsGenes,XchromGenes)
message("Total X chromosomal genes showing alternative splicing: ", length(xchromosomalSigAsGenes))

In [None]:
# First collect the numbers
das_xchrom_escape <- intersect(xchrom_escaped, xchromosomalSigAsGenes)
das_xchrom_non_escape <- setdiff(xchromosomalSigAsGenes, das_xchrom_escape)
n_das_xchrom_escape <- length(das_xchrom_escape)
n_das_xchrom_non_escape <- length(das_xchrom_non_escape)

# Non-DAS X chromosomal
nondas_xchrom <- setdiff(XchromGenes, xchromosomalSigAsGenes)
nondas_xchrom_escape <- intersect(nondas_xchrom, xchrom_escaped)
nondas_xchrom_non_escape <- intersect(nondas_xchrom, xchrom_non_escaped)
n_nondas_xchrom_escape <- length(nondas_xchrom_escape)
n_nondas_xchrom_non_escape <- length(nondas_xchrom_non_escape)
# sanity check
if (n_xchrom_genes != (n_das_xchrom_escape + n_das_xchrom_non_escape + n_nondas_xchrom_escape + n_nondas_xchrom_non_escape)) {
    message("nondas_xchrom: ", length(nondas_xchrom))
    message("n_das_xchrom_escape: ", n_das_xchrom_escape, " n_das_xchrom_non_escape: ", n_das_xchrom_non_escape)
    message("n_nondas_xchrom_escape: ", n_nondas_xchrom_escape, " n_nondas_xchrom_non_escape: ", n_nondas_xchrom_non_escape)
    message("n_xchrom_genes: ",n_xchrom_genes)
    mysum <- n_das_xchrom_escape + n_das_xchrom_non_escape + n_nondas_xchrom_escape + n_nondas_xchrom_non_escape
    message("but we were expecting ", mysum)
    #stop("Problems extracting correct number of X chromosomal genes")
}

x_vs_as  <- matrix(c(n_das_xchrom_escape, n_nondas_xchrom_escape,n_das_xchrom_non_escape, n_nondas_xchrom_non_escape), nrow=2,byrow = TRUE)
x_vs_as
fisher.test(x_vs_as)

# Get set of genes that show significant differential expression in at least one tissue
Note that one of the column headers is blank and so we just skip one line to avoid a parse error.

In [None]:
dge <- read.table("../data/gene_dge.tsv", sep = "\t", header = FALSE, row.names=1, skip = 1)

In [None]:
head(dge)

In [None]:
dge_genes <- sort(dge$V5)
xchromosomalDiffGenes <- intersect(dge_genes, XchromGenes)
xchromosomalDiffGenes <- sort(xchromosomalDiffGenes)
dge_xchrom_escape <- intersect(xchrom_escaped, xchromosomalDiffGenes)
dge_xchrom_nonescape <- setdiff(xchromosomalDiffGenes, xchrom_escaped)
n_dge_xchrom <- length(xchromosomalDiffGenes)
n_dge_xchrom_escape <- length(dge_xchrom_escape)
n_dge_xchrom_nonescape <- length(dge_xchrom_nonescape)

# Non-DGE X chromosomal
nondge_xchrom <- setdiff(XchromGenes, xchromosomalDiffGenes)
n_nondge_xchrom <- length(nondge_xchrom)
nondge_xchrom_escape <- intersect(nondge_xchrom, xchrom_escaped)
nondge_xchrom_non_escape <- intersect(nondge_xchrom, xchrom_non_escaped)
n_nondge_xchrom_escape <- length(nondge_xchrom_escape)
n_nondge_xchrom_non_escape <- length(nondge_xchrom_non_escape)

subtotal <- n_dge_xchrom_escape + n_dge_xchrom_nonescape + n_nondge_xchrom_escape + n_nondge_xchrom_non_escape


message("nondge_xchrom: ", n_nondge_xchrom)
message("n_dge_xchrom_escape: ", n_dge_xchrom_escape, " n_dge_xchrom_non_escape: ", n_dge_xchrom_nonescape)
message("n_nondge_xchrom_escape: ", n_nondge_xchrom_escape, " n_nondge_xchrom_non_escape: ", n_nondge_xchrom_non_escape)
message("n_xchrom_genes: ",n_xchrom_genes)

In [None]:
x_vs_dge <- matrix(c(n_dge_xchrom_escape, n_dge_xchrom_nonescape, n_nondge_xchrom_escape, n_nondge_xchrom_non_escape), nrow=2,byrow = TRUE)
x_vs_dge
fisher.test(x_vs_dge)

# **Make grouped bar plot**
### First collect the counts of escape/inactive/variable X chromosomal, AS, and DGE genes

In [None]:
# Colors for uniformity
# Let's use the following colors for uniformity
darkblue <- "#3c5488"
blue <- "#4BDDB5"
red <- "#e64b35"
nearlyblack <- "#040C04"
purple <- "#790079"
orange = "#ff9900"
green <- "#00A087"

### Transform counts into percentages

In [None]:
# as.total <- as.escape.len + as.non_escape.len
n_das_total <- n_das_xchrom_escape + n_das_xchrom_non_escape


 

as.escape.per <-  n_das_xchrom_escape/n_das_total
as.nonescape.per <-  n_das_xchrom_non_escape/n_das_total
x.total <- n_xchrom_escaped + n_xchrom_non_escaped
x.escape.per <-  n_xchrom_escaped/x.total
x.nonescape.per <- n_xchrom_non_escaped/x.total
dge.total <-n_dge_xchrom_escape +n_dge_xchrom_nonescape
dge.escape.per <-  n_dge_xchrom_escape/dge.total
dge.nonescape.per <-  n_dge_xchrom_nonescape/dge.total
message("AS escape: ", n_das_xchrom_escape, " (", formatC(100*as.escape.per, digits=3), "%) AS non-escape: ", n_das_xchrom_non_escape , " (",100*as.nonescape.per ,"%)")
message("X chr escape: ", n_xchrom_escaped, " (",formatC(100*x.escape.per,digits=3), "%) X chr nonescape: ",n_xchrom_non_escaped, " (", formatC(100*x.nonescape.per,digits=3),"%)")
message("DGE escape: ",  n_dge_xchrom_escape, " (", formatC(100*dge.escape.per,digits=3), "%) DGE inactive: ", dge.nonescape.per," (", formatC(100*dge.nonescape.per,digits=3),"%)")

In [None]:
library(ggplot2)
 
# create a dataset
category <- c(rep("DAS" , 2) , rep("chr X" , 2) , rep("DGE" , 2) )
condition <- rep(c("escape" , "inactive") , 3)
value <- c(as.escape.per, as.nonescape.per,x.escape.per, x.nonescape.per,dge.escape.per, dge.nonescape.per)
data <- data.frame(category,condition,value)


# Grouped
g <- ggplot(data, aes(fill=condition, y=value, x=category), color=barcolors) + 
    geom_bar(position="dodge", stat="identity") +
    theme_bw() +
    scale_y_continuous(labels = scales::percent) +
    scale_fill_manual(values=c(red, darkblue)) +
    theme(axis.text = element_text(size=32),
          axis.title = element_blank(),
          legend.title = element_blank(),
          legend.text = element_text(size = 32),
          legend.position = c(0.82,0.9)
          )
ggsave("../pdf/XchromosomalEscape.pdf",g)
message("Saved plot as ../pdf/XchromosomalEscape.pdf")
g