# Visualize results, local splicing

## Load Libraries

In [1]:
library(tidyverse)
library(ggplot2)
library(DT)
library(leafcutter)
library(reshape2)
library(gridExtra)
library(intervals) # needed for pretty strand arrow placement
library(foreach)
library(grid)
library(gtable)
library(ggrepel)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --

[32mv[39m [34mggplot2[39m 3.3.5     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.1.2     [32mv[39m [34mdplyr  [39m 1.0.7
[32mv[39m [34mtidyr  [39m 1.1.3     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: Rcpp


Attaching package: 'reshape2'


The following object is masked from 'package:tidyr':

 

## Summary of results

In [2]:
lname = load('../../_m/leafviz.RData')
lname

In [3]:
levels(meta$group) <- c("Female", "Male")
sample_table

group,count
<chr>,<int>
F,121
M,254


In [4]:
cluster_summary

Results,n
<chr>,<int>
Number of differentially spliced clusters at FDR = 0.05,31
Fully annotated,11
Contain unannotated junctions,20


In [5]:
intron_summary

Results,n
<chr>,<int>
Number of fully annotated junctions,70
Number of junctions with cryptic 5' splice site,15
Number of junctions with cryptic 3' splice site,20
Number of junctions with two cryptic splice sites,20
Number of novel junctions that connect two annotated splice sites,7


In [6]:
clusters['gene'] <- gsub("</i>", "", gsub("<i>", "", clusters$gene))
head(clusters)

Unnamed: 0_level_0,clusterID,N,coord,gene,annotation,FDR
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>
31,clu_739_-,3,chrX:53217966-53220839,KDM5C,annotated,7.29e-35
29,clu_729_-,12,chrX:53176622-53193437,KDM5C,cryptic,2.62e-25
27,clu_57947_+,3,chrX:47199106-47199480,UBA1,cryptic,3.27e-11
19,clu_164860_+,3,chr7:74189918-74194741,EIF4H,annotated,1.59e-05
30,clu_736_-,3,chrX:53210576-53211497,KDM5C,cryptic,2.37e-05
3,clu_5190_-,16,chr11:62520391-62530586,.,annotated,0.000102


In [7]:
write.table(clusters, file="cluster_ds_results_annotated.txt", 
            sep="\t", quote=FALSE, row.names=FALSE)

## Generate plots

### Define functions

In [8]:
filter_intron_table <- function(introns, clu){
    d < - introns %>% filter(clusterID == clu) %>% 
        select(chr, start, end, verdict, deltapsi) %>%
        arrange(desc(abs(deltapsi))) %>%
        rename("ΔPSI" = deltapsi)
    row.names(d) <- letters[1:nrow(d)] # letters is just a:z
    return(d)
}


getGeneLength <- function(gene_name, exons_table){
    exons      <- exons_table[ exons_table$gene_name == gene_name, ]
    geneStart  <- min(exons$start)
    geneEnd    <- max(exons$end)
    geneLength <- geneEnd - geneStart
    if( geneLength >1e6){
        pixels <- 5000
    } else if ( geneLength > 5e5 & geneLength < 1e6){
        pixels <- 3000
    } else if ( geneLength > 1.5e5 & geneLength <= 5e5){
        pixels <- 2000
    } else {
        stopifnot(geneLength <= 1.5e5)
        pixels <- "auto"
    }
    return(pixels)
}


select_data <- function(sel, clusters, exons_table){
    gene <- clusters[ sel, ]$gene
    width <- getGeneLength(gene, exons_table)
    clusterID <- clusters[ sel, ]$clusterID
    coord <- clusters[ sel, ]$coord
    return(list(gene = gene, width = width, cluster = clusterID, coord = coord))
}

### Plot top 6 clusters

In [9]:
plot_cluster <- function(num, clusters, dir='./'){
    mydata = select_data(num, clusters, exons_table)
    if(mydata$gene == '.'){
        mydata$gene = gsub("-", "_", gsub(":", "_", mydata$coord))
    }
    gene_name = mydata$gene
    plotTitle <- paste0(gene_name, '_', mydata$cluster, '_top_',num,'.pdf')
    pdf(file=paste0(dir, plotTitle), width = 10, height = 5)
    print(make_cluster_plot(mydata$cluster,
                            main_title = plotTitle,
                            meta = meta,
                            cluster_ids = cluster_ids,
                            exons_table = exons_table,
                            counts = counts,
                            introns = introns))
    dev.off()

    if (is.numeric(mydata$width)) {
        new_width = mydata$width / 100
    } else {
        new_width = mydata$width
    }

    pdf(file=paste0(dir, gene_name, '_allClusters_top_',num,'.pdf'), 
        width=new_width, height=6)
    try(print(make_gene_plot(mydata$gene,
                         counts = counts,
                         introns = introns,
                         exons_table = exons_table,
                         cluster_list = clusters,
                         clusterID = mydata$cluster,
                         introns_to_plot = introns_to_plot, debug=F)))
    dev.off()
}


## Plot splicing

In [10]:
dir.create("top10")
for(num in 1:10){
    ii = plot_cluster(num, clusters, "top10/")
}

"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"no non-missing arguments to min; returning Inf"
"no non-missing arguments to max; returning -Inf"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"no non-missing arguments to min; returning Inf"
"no non-missing arguments to max; returning -Inf"


Error in make_gene_plot(mydata$gene, counts = counts, introns = introns,  : 
  length(unique(exons$chr)) == 1 is not TRUE


"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"no non-missing arguments to min; returning Inf"
"no non-missing arguments to max; returning -Inf"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"no non-missing arguments to min; returning Inf"
"no non-missing arguments to max; returning -Inf"


Error in make_gene_plot(mydata$gene, counts = counts, introns = introns,  : 
  length(unique(exons$chr)) == 1 is not TRUE


"no non-missing arguments to min; returning Inf"
"no non-missing arguments to max; returning -Inf"
"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"
"no non-missing arguments to min; returning Inf"
"no non-missing arguments to max; returning -Inf"


Error in make_gene_plot(mydata$gene, counts = counts, introns = introns,  : 
  length(unique(exons$chr)) == 1 is not TRUE


"`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead."


TableGrob (2 x 1) "arrange": 2 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (2-2,1-1) arrange gtable[layout]


"'mode(width)' differs between new and previous
	 ==> NOT changing 'width'"


In [11]:
dir.create("x_chromosome")
x_clu = clusters %>% filter(str_detect(coord, "chrX"), FDR < 0.05)
x_clu

clusterID,N,coord,gene,annotation,FDR
<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>
clu_739_-,3,chrX:53217966-53220839,KDM5C,annotated,7.29e-35
clu_729_-,12,chrX:53176622-53193437,KDM5C,cryptic,2.62e-25
clu_57947_+,3,chrX:47199106-47199480,UBA1,cryptic,3.27e-11
clu_736_-,3,chrX:53210576-53211497,KDM5C,cryptic,2.37e-05
clu_57932_+,6,chrX:47084600-47092061,RGN,cryptic,0.00417
clu_1778_-,5,chrX:152989331-152991916,PNMA5,cryptic,0.00433
clu_59263_+,4,chrX:153768428-153769162,PLXNB3,cryptic,0.0476


In [None]:
for(num in 1:dim(x_clu)[1]){
    plot_cluster(num, x_clu, "x_chromosome/")
}

In [12]:
#dir.create("ank3")
ank3 = clusters %>% filter(gene == 'ANK3')
ank3

clusterID,N,coord,gene,annotation,FDR
<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>
