# Prepare data for plotting with MASER

To accomplish the scale computing, the rMATS 3.2.5 was run in parallel in a pair-wise, sample by sample manner.   Aggregation of results was done by creating a matrix, deconvolving the output produced by rMATS 3.2.5.   To prepare specific data, genes and their transcript isoforms for plotting, we reconstruct the output putting it back into the standard rMATS 3.2.5 format.

## Alternative Splice Site Types are: (se, a3ss, a5ss, mxe, ri)

  * Skipped Exon events (se),
  * Alternative 3' splice site (a3ss),
  * Alternative 5' splice site (a5ss),
  * Mutually exclusive exon (mxe),
  * and retention intron (ri)

## There are two different kinds of junction counts

For our analysis here, we used just the jc count matrices.
  * jc = junction counts - reads that cross the junction
  * jcec = junction counts plus reads on the target (such as included exon)

## And the count type -- there are 5 types

  * inclusion levels (percent spliced in)
  * included junction counts (ijc)
  * skipped junction counts (sjc)
  * inclusion length (inclen)
  * skipped length (skiplen)

## 1. Loading dependencies

In [None]:
suppressMessages({
library(readr)
library(edgeR)
library(limma)
library(Biobase)
library(tibble)
library(R.utils)
library(snakecase)
library(icesTAF)
})

# 1.0 Data preparation

We need two things:

* list of genes of interest
* rmats results files

The significant results were obtained on a per-sample bases.   
Reconstruction of the arranged SAMPLE1 will be the male samples

### 1.2 get rMATS GTF annotations

For each splicing type, the junctions are defined, so we have 5 specific annotated splicing specific junction ID annotation files:
fromGTF.A3SS.txt <- annotations for the alternative 3' splice site junctions
fromGTF.A5SS.txt <- annotations for the alternative 5' splice site junctions
fromGTF.MXE.txt <- annotations for the mutually exclusive exon junctions
fromGTF.RI.txt <- annotations for the retained introns junctions
fromGTF.SE.txt <- annotations for the skipped exon junctions

In [None]:
getReleasedGTFAnnotations <- function ( destDir ) {

    message("Decompressing fromGTF.tar.gz into ../data")
    system("mkdir -p ../data && tar xvfz ../data/fromGTF.tar.gz -C ../data", intern = TRUE)
    message("Done!\n")
    message("Gunzipping files into ../data")
    system("gunzip ../data/fromGTF.*txt.gz", intern = TRUE)
    message("Done!\n")
   return (0)
}

### 1.2.6 get reduced Tissue Data

Stored in the assets subdirectory, reduced by inspection and selection focusing on those tissues with sufficient samples.

In [None]:
getTissueReduction <- function ( filename ) {

    tissue_reduction <- read.table(filename, header=TRUE, sep="\t",
                               skipNul=FALSE, stringsAsFactors = FALSE)
    colnames(tissue_reduction)  <- c("SMTSD","female","male","include","display_name")

    return(tissue_reduction)
}

### 1.2.8 get GTEx phenotype data for the SRR accessions
Transitive closure permits the association of the sequence reads, SRR Accessions, through the SraRunTable.txt (obtained from selecting annotation from the dbGaP login) with the SAMPID used with the GTEx.  This SAMPID is the means by which we can get this phenotype data and associate it with the counts data.

In [None]:
getGTExPhenoDataForSRR <- function (destDir) {

    message("Loading srr_pdata\n")
    srr_pdata <- readr::read_csv("../data/srr_pdata.csv")
    message("done!\n")

    return(srr_pdata)
}

### 1.2.9 makeCountsMatrix 

Given the counts filename, make a data matrix.

In [None]:
makeCountsMatrix <- function (filename_gz) {
    message("\nloading ", paste(filename_gz), collapse=" ")
    counts <- data.table::fread(filename_gz)
    message("done!")
    rownames(counts) <- counts$ID
    counts <- counts[,-1]
    counts <- data.matrix(counts)
    return(counts)
}

In [None]:
makeSplicingExpressionSetObject <- function (srr_pdata, counts) {
    message("making splicing expressionSet object")
    #
    # match srr counts with srr_pdata - there were some srr without phenodata
    #
    pdata_match <- as.character(colnames(counts)) %in% as.character(srr_pdata$'SRR')
    
    counts <- counts[,pdata_match]

    #
    # reorder the srr_pdata to match the colnames of the counts
    #
    reorder_idx <- match(as.character(colnames(counts)), as.character(srr_pdata$'SRR'))
    srr_pdata   <- srr_pdata[reorder_idx,]

    #
    # make the srr_pdata an AnnotatedDataFrame
    #
    metadata <- data.frame(labelDescriptions=as.character(colnames(srr_pdata)))
    phenoData <- new("AnnotatedDataFrame", data = srr_pdata, varMetadata=metadata)

    #
    # make the counts an expressionSet
    # and provide the phenoData (the annotatedDataFrame construct from above)
    #
    es <- ExpressionSet(as.matrix(counts))
    phenoData(es) <- phenoData
    
    message("made new expressionSet object\n",
           paste(dim(es)), collapse = " ")
    message("dim pData(es)\n",
           paste(dim(pData(es))), collapse = " ")
    message("dim exprs(es)\n",
           paste(dim(exprs(es))), collapse = " ")
    message("done!\n")
    
    return(es)
}

## 1.3 Preprocessing 


### 1.3.1  Reduce Sample Set 
Read in all requirements so that the stage is properly set -- tissues.tsv contains the subset of files desired for analysis.
It is found in the `assets` subdirectory

In [None]:
reduceSampleSet <- function (tissue_reduction, es) {

   message("\nsize tissue_reduction\n",
        paste(dim(tissue_reduction), collapse=" "))
   message("\nsize es\n",
        paste(dim(es)), collapse=" ")
   message("\nsize pData(es)\n",
        paste(dim(pData(es)), collapse=" "))
   # only include those tissues we wish to continue with
   message("\n number of tissue types to keep\n",
        paste(table(tissue_reduction$include)), collapse = " ")
   tissue_reduction <- tissue_reduction[tissue_reduction$include==1,]

   # create a matching tissue name to go with the expressionSet phenotype esect
   pData(es)$SMTSD        <- factor(snakecase::to_snake_case(as.character(pData(es)$SMTSD)))
   tissue_reduction$SMTSD <- factor(snakecase::to_snake_case(as.character(tissue_reduction$SMTSD)))

   message("\nlength tissues in phenotype data\n",
        paste(length(levels(pData(es)$SMTSD)), collapse = " "))
   message("\nlength tissues in tissue_reduction data\n",
        paste(length(tissue_reduction$SMTSD), collapse = " "))

   keep <- pData(es)$SMTSD %in% tissue_reduction$SMTSD
   message("\nhow many to keep in phenotype data\n",
        paste(table(keep), collapse = " "))

   es        <- es       [          ,keep==TRUE]
   pData(es)$SMTSD <- factor(pData(es)$SMTSD)
   message("\nsize reduced es\n",
        paste(dim(es)), collapse=" ")
   message("\nsize pData(es)\n",
        paste(dim(pData(es)), collapse=" "))
   message("\nsize exprs(es)\n",
        paste(dim(exprs(es)), collapse = " "))

   # test to make sure we don't have nonsense
#   keep = pData(es)$SMTSD== "breast_mammary_tissue"
#   message("\nTEST: how many to keep in to have only breast_mammary_tissue\n",
#        paste(table(keep), collapse = " "))
#   tes        = es       [          ,keep==TRUE]
#   pData(tes) = pData(es)[keep==TRUE,          ]
#   message("\nTEST: size breast_mammary_tissue tes\n",
#        paste(dim(tes), collapse=" "))
#   message("\nTEST: size phenotype object pData(tes)\n",
#        paste(dim(pData(tes)), collapse=" "))
   # end test
   return (es)
}

### 3.2 MAIN routine

In [None]:
getReleasedGTFAnnotations (destDir <- "../data/")

In [None]:
# get the metdata
tissue_reduction <- getTissueReduction ( "../assets/tissues.tsv" )
srr_pdata        <- getGTExPhenoDataForSRR ("../data/")

In [None]:
splice_list  <- c("se", "a3ss", "a5ss", "mxe", "ri")
tissue_reduction <- tissue_reduction[tissue_reduction$include==1,]
tissue_list  <- factor(snakecase::to_snake_case(as.character(tissue_reduction$SMTSD))) 
# # Read in the list of genes of interest

In [None]:
genes_of_interest <- read.table("../data/genes_of_interest.tsv", header=TRUE,
                                sep="\t", skipNul=FALSE, stringsAsFactors = FALSE)

genes_of_interest <- as.character(genes_of_interest$genes)

In [None]:
gene_as           <- read.table("../data/genes_as_of_interest.tsv", header=TRUE,
                                sep="\t", skipNul=FALSE, stringsAsFactors = FALSE)

In [None]:
write.table("SpliceType", file="../data/incLevelDifferences.tsv", append=FALSE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
write.table("Tissue", file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
write.table("ID", file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
write.table("GeneID", file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
write.table("geneSymbol", file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table("IncLevelDifference", file="../data/incLevelDifferences.tsv", append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)

In [None]:
maser_directory = "../data/MASER2020Sept16"
mkdir(maser_directory)

 for (tissue_index in (tissue_list)) {

   tissue_dir = paste(maser_directory, tissue_index, sep="/")
   mkdir(tissue_dir)

   for (splice_type in splice_list) {
   
    if (splice_type == "se") {

     message("tissue of interest is ", tissue_index)

     message("splice_type is ", splice_type)

     # for each splice type the ReadsOnTarget output file we are generating is unique
     file <- paste(paste("../data/fromGTF",
              snakecase::to_swap_case(as.character(splice_type)), sep="."),
      	"txt", sep=".")
     fromGTF          <- read.table(file=file, header=TRUE)

     # Reduce the rows by focusing only on the splice_type, tissue type
     splice_type_gene_as <- gene_as[gene_as$ASE == snakecase::to_swap_case(as.character(splice_type)),]
     message("splice_type_gene_as dim is " , dim(splice_type_gene_as))
      
     # Reduce rows by focusing on the tissue of interest
     tissue              <- splice_type_gene_as$Tissue
      
     keep_tissue         <- tissue %in% tissue_index

     if (sum(keep_tissue == TRUE) > 0) {
	
       splice_type_gene_as <- splice_type_gene_as[keep_tissue == TRUE,]

      # Reduce the rows by focusing only on the genes of interest
      gs                  <- splice_type_gene_as$GeneSymbol
      if (length(gs) > length (genes_of_interest)) {
           keep_gs             <- gs %in% genes_of_interest
      } else {
           keep_gs             <- genes_of_interest %in% gs
      }
      if (sum(keep_gs == TRUE) > 0) {
        splice_type_gene_as <- splice_type_gene_as[keep_gs == TRUE,]

In [None]:
         # order this remaining matrix
         idx                 <- order(splice_type_gene_as$ASE_IDX)
         splice_type_gene_as <- splice_type_gene_as[idx,]
  
         # extract the junction to reduce the other matrices by
  	 junction            <- splice_type_gene_as$ASE_IDX
	 
	 if (sum(is.na(junction)) == 0) {
	  
          message("junctions are = ", junction)
	
          # reduce rows by keeping only the junctions we care about
          fromGTF             <- fromGTF[junction,]

          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.ijc.txt.gz", sep=".")
          ijc_counts          <- makeCountsMatrix( filename_gz  <- file)
          ijc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- ijc_counts)
          rm (ijc_counts)
          ijc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- ijc)
          message("PASS ijc reduced matrix to junctions of interest (rows) is done\n")

          # reduce rows by keeping only the junctions we care about
          ijc                 <- ijc[junction,]
          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.sjc.txt.gz", sep=".")
          sjc_counts          <- makeCountsMatrix( filename_gz  <- file)
          sjc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- sjc_counts)
          rm(sjc_counts)
          sjc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- sjc)
          # reduce rows by keeping only the junctions we care about
          sjc                 <- sjc[junction,]
          message("PASS sjc reduced matrix to junctions of interest (rows) is done\n")
 
          # reduce the ijc sjc matrices to just the samples that are of the tissue of interest (columns now)
          tissue_idx          <- c(pData(ijc)$SMTSD == tissue_index)
          ijc                 <- ijc[, tissue_idx == TRUE]
          ijc_srrnames        <- colnames(exprs(ijc))
          sjc                 <- sjc[, tissue_idx == TRUE]
          sjc_srrnames        <- colnames(exprs(sjc))
          message("PASS done reducing matrix to tissue_index (columns) is done\n")
    
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inc.txt.gz", sep=".")
          inc                 <- data.table::fread(file)
          rownames(inc)       <- inc$ID
  	  # remove the first column -- the ID column
          inc                 <- inc[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inclen.txt.gz", sep=".")
          inclen              <- data.table::fread(file)
          rownames(inclen)    <- inclen$ID
     	  # remove the first column -- the ID column
          inclen              <- inclen[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.skiplen.txt.gz", sep=".")
          skiplen             <- data.table::fread(file)
          rownames(skiplen)   <- skiplen$ID
	  # remove the first column -- the ID column
          skiplen             <- skiplen[,-1]
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])	
          message("PASS done inc, inclen and skiplen loaded is done\n")

          # reduce the rows to match our junctions
 	  inc_srrnames        <- colnames(inc)
	  rm(keep)
 	  keep                <- inc_srrnames %in% ijc_srrnames
          inc                 <- matrix(as.numeric(inc    [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          inclen              <- matrix(as.numeric(inclen [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          skiplen             <- matrix(as.numeric(skiplen[,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          message ("inc dim = ", dim(inc))
 	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])
	  if (dim(inc)[1] > 0) {
           message("PASS done inc, inclen and skiplen reduced by columns to match ijc and insuring > 0\n")

           # we need to further divide them into SAMPLE_1 and SAMPLE_2
           # 
           # SAMPLE_1 data will hold all the male samples (GTEx SEX == 1)
           # SAMPLE_2 data will hold all the female samples (GTEx SEX == 2) 
           #
  	   ijc_srrnames     <- as.character(colnames(exprs(ijc)))
           male_srrnames    <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 1])))
           keep             <- ijc_srrnames %in% male_srrnames
 	   table (keep)
           tmp_IJC_SAMPLE_1 <- exprs(ijc[, pData(ijc)$SEX == 1])
           tmp_SJC_SAMPLE_1 <- exprs(sjc[, pData(ijc)$SEX == 1])
           tmp_IncLevel1    <- matrix(as.numeric(inc[,keep==TRUE]),
	                    nrow = length(junction), ncol = length(male_srrnames))
  	   message ("PASS tmp_IncLevel1 made\n")
           message("PASS done separated into male SAMPLE_1\n")

           female_srrnames  <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 2])))
           tmp_IJC_SAMPLE_2 <- exprs(ijc[, pData(ijc)$SEX == 2])
           tmp_SJC_SAMPLE_2 <- exprs(sjc[, pData(ijc)$SEX == 2])
           tmp_IncLevel2    <- matrix(as.numeric(inc[, pData(ijc)$SEX == 2]),
	                    nrow = length(junction), ncol = length(female_srrnames))
           message("PASS done separated into male SAMPLE_2\n")

           inters = intersect(male_srrnames, female_srrnames)
           if (length(inters) != 0) {
             message("ERROR in female to male separation!\n")
           } else {
             message("PASS female and male samples separated\n")
           }

           # InclevelDifference is the difference of the rowMeans of IncLevel1 and IncLevel2
           inclevel1_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel1), nrow = length(junction), ncol = length(male_srrnames)))
           inclevel2_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel2), nrow = length(junction), ncol = length(female_srrnames)))
           IncLevelDifference <- inclevel1_rm - inclevel2_rm
           message("PASS done IncLevelDifference calculated ", IncLevelDifference)
    
           IJC_SAMPLE_1     <- tmp_IJC_SAMPLE_1[,1]
           SJC_SAMPLE_1     <- tmp_SJC_SAMPLE_1[,1]
           IncLevel1        <- tmp_IncLevel1[,1]
    
           for (i in 2:length(male_srrnames)) {
            IJC_SAMPLE_1 <- paste(IJC_SAMPLE_1, tmp_IJC_SAMPLE_1[,i], sep=",")
            SJC_SAMPLE_1 <- paste(SJC_SAMPLE_1, tmp_SJC_SAMPLE_1[,i], sep=",")
            IncLevel1    <- paste(IncLevel1   , tmp_IncLevel1   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_1, SJC_SAMPLE_1 and IncLevel1 done!\n")
    
           IJC_SAMPLE_2     <- tmp_IJC_SAMPLE_2[,1]
           SJC_SAMPLE_2     <- tmp_SJC_SAMPLE_2[,1]
           IncLevel2        <- tmp_IncLevel2[,1]
    
           for (i in 2:length(female_srrnames)) {
            IJC_SAMPLE_2 <- paste(IJC_SAMPLE_2, tmp_IJC_SAMPLE_2[,i], sep=",")
            SJC_SAMPLE_2 <- paste(SJC_SAMPLE_2, tmp_SJC_SAMPLE_2[,i], sep=",")
            IncLevel2    <- paste(IncLevel2   , tmp_IncLevel2   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_2, SJC_SAMPLE_2 and IncLevel2 done!\n")
 
           # Ready to print out all the bits.
           # We will paste the line
           # first part comes from the fromGTF.splicetype.txt file
	   # Each splice_type is unique in their files
	   # Here we use fromGTF.SE.txt
           file <- paste(paste(tissue_dir, snakecase::to_swap_case(as.character(splice_type)), sep="/"),
                             "MATS.ReadsOnTargetAndJunctionCounts.txt", sep= ".")
           write.table("ID", file=file, append=FALSE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
           write.table("GeneID", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
           write.table("geneSymbol", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("chr", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("strand", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("exonStart_0base", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("exonEnd", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("upstreamES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("upstreamEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("downstreamES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("downstreamEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("ID", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SkipFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("PValue", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("FDR", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevelDifference", file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
           message("PASS header written to the file!\n")

           # first part is all the information from the fromGTF
           # The structure we have is of the dimensions of the fromGTF, reduced by row to the junctions of signficance
           for (m in 1:(dim(fromGTF)[1])) {
             for (n in 1:(dim(fromGTF)[2])) {
               write.table(fromGTF[m,n], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	       
    	     }
             write.table(splice_type, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(tissue_index, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,1], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,2], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,3], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS fromGTF file written to the file!\n")
             #now the rest		
             write.table(IncLevelDifference[m], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(junction[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(inclen    [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(skiplen   [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$AdjPVal, file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$B      , file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevelDifference[m], file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS header written to the file!\n")
             message("PASS row ", m)
    	     message("IJC, SJC, IncLevel for SAMPLE_1 and SAMPLE_2 written to the file!\n")
	    
           } # for all junctions of interest
	   
 	  } # and if there is > 0 junctions with inc
#	  rm(keep_gs)
	  
         } # end !is.na(junction)

In [None]:
#       } # and if there is > 0 genes of interest
       rm(keep_tissue)
       
      } # if there are any tissues for splice type "se"
      # end if splice_type is "se"
    
    } else if (splice_type == "a3ss") {

      message("tissue of interest is ", tissue_index)

      message("splice_type is ", splice_type)

      # for each splice type the ReadsOnTarget output file we are generating is unique
      file <- paste(paste("../data/fromGTF",
              snakecase::to_swap_case(as.character(splice_type)), sep="."),
      	"txt", sep=".")
      fromGTF          <- read.table(file=file, header=TRUE)

      # Reduce the rows by focusing only on the splice_type, tissue type
      splice_type_gene_as <- gene_as[gene_as$ASE == snakecase::to_swap_case(as.character(splice_type)),]
      message("splice_type_gene_as dim is " , dim(splice_type_gene_as))
      
      # Reduce rows by focusing on the tissue of interest
      tissue              <- splice_type_gene_as$Tissue
      
      keep_tissue         <- tissue %in% tissue_index

      if (sum(keep_tissue == TRUE) > 0) {
	
       splice_type_gene_as <- splice_type_gene_as[keep_tissue == TRUE,]

      # Reduce the rows by focusing only on the genes of interest
      gs                  <- splice_type_gene_as$GeneSymbol
      if (length(gs) > length (genes_of_interest)) {
           keep_gs             <- gs %in% genes_of_interest
      } else {
           keep_gs             <- genes_of_interest %in% gs
      }
      if (sum(keep_gs == TRUE) > 0) {
       splice_type_gene_as <- splice_type_gene_as[keep_gs == TRUE,]

In [None]:
        # order this remaining matrix
        idx                 <- order(splice_type_gene_as$ASE_IDX)
        splice_type_gene_as <- splice_type_gene_as[idx,]
  
        # extract the junction to reduce the other matrices by
  	junction            <- splice_type_gene_as$ASE_IDX

	 if (sum(is.na(junction)) == 0) {
	 
          message("junctions are = ", junction)
          # reduce rows by keeping only the junctions we care about
          fromGTF             <- fromGTF[junction,]

          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.ijc.txt.gz", sep=".")
          ijc_counts          <- makeCountsMatrix( filename_gz  <- file)
          ijc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- ijc_counts)
          rm (ijc_counts)
          ijc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- ijc)
          message("PASS ijc reduced matrix to junctions of interest (rows) is done\n")

          # reduce rows by keeping only the junctions we care about
          ijc                 <- ijc[junction,]
          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.sjc.txt.gz", sep=".")
          sjc_counts          <- makeCountsMatrix( filename_gz  <- file)
          sjc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- sjc_counts)
          rm(sjc_counts)
          sjc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- sjc)
          # reduce rows by keeping only the junctions we care about
          sjc                 <- sjc[junction,]
          message("PASS sjc reduced matrix to junctions of interest (rows) is done\n")
 
          # reduce the ijc sjc matrices to just the samples that are of the tissue of interest (columns now)
          tissue_idx          <- c(pData(ijc)$SMTSD == tissue_index)
          ijc                 <- ijc[, tissue_idx == TRUE]
          ijc_srrnames        <- colnames(exprs(ijc))
          sjc                 <- sjc[, tissue_idx == TRUE]
          sjc_srrnames        <- colnames(exprs(sjc))
          message("PASS done reducing matrix to tissue_index (columns) is done\n")
    
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inc.txt.gz", sep=".")
          inc                 <- data.table::fread(file)
          rownames(inc)       <- inc$ID
  	  # remove the first column -- the ID column
          inc                 <- inc[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inclen.txt.gz", sep=".")
          inclen              <- data.table::fread(file)
          rownames(inclen)    <- inclen$ID
     	  # remove the first column -- the ID column
          inclen              <- inclen[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.skiplen.txt.gz", sep=".")
          skiplen             <- data.table::fread(file)
          rownames(skiplen)   <- skiplen$ID
	  # remove the first column -- the ID column
          skiplen             <- skiplen[,-1]
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])	
          message("PASS done inc, inclen and skiplen loaded is done\n")

          # reduce the rows to match our junctions
 	  inc_srrnames        <- colnames(inc)
	  rm (keep)
 	  keep                <- inc_srrnames %in% ijc_srrnames
          inc                 <- matrix(as.numeric(inc    [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          inclen              <- matrix(as.numeric(inclen [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          skiplen             <- matrix(as.numeric(skiplen[,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          message ("inc dim = ", dim(inc))
 	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])
	  if (dim(inc)[1] > 0) {
           message("PASS done inc, inclen and skiplen reduced by columns to match ijc and insuring > 0\n")

           # we need to further divide them into SAMPLE_1 and SAMPLE_2
           # 
           # SAMPLE_1 data will hold all the male samples (GTEx SEX == 1)
           # SAMPLE_2 data will hold all the female samples (GTEx SEX == 2) 
           #
  	   ijc_srrnames     <- as.character(colnames(exprs(ijc)))
           male_srrnames    <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 1])))
	   rm(keep)
           keep             <- ijc_srrnames %in% male_srrnames
 	   table (keep)
           tmp_IJC_SAMPLE_1 <- exprs(ijc[, pData(ijc)$SEX == 1])
           tmp_SJC_SAMPLE_1 <- exprs(sjc[, pData(ijc)$SEX == 1])
           tmp_IncLevel1    <- matrix(as.numeric(inc[,keep==TRUE]),
	                    nrow = length(junction), ncol = length(male_srrnames))
  	   message ("PASS tmp_IncLevel1 made\n")
           message("PASS done separated into male SAMPLE_1\n")

           female_srrnames  <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 2])))
           tmp_IJC_SAMPLE_2 <- exprs(ijc[, pData(ijc)$SEX == 2])
           tmp_SJC_SAMPLE_2 <- exprs(sjc[, pData(ijc)$SEX == 2])
           tmp_IncLevel2    <- matrix(as.numeric(inc[, pData(ijc)$SEX == 2]),
	                    nrow = length(junction), ncol = length(female_srrnames))
           message("PASS done separated into male SAMPLE_2\n")

           inters = intersect(male_srrnames, female_srrnames)
           if (length(inters) != 0) {
             message("ERROR in female to male separation!\n")
           } else {
             message("PASS female and male samples separated\n")
           }

           # InclevelDifference is the difference of the rowMeans of IncLevel1 and IncLevel2
           inclevel1_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel1), nrow = length(junction), ncol = length(male_srrnames)))
           inclevel2_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel2), nrow = length(junction), ncol = length(female_srrnames)))
           IncLevelDifference <- inclevel1_rm - inclevel2_rm
           message("PASS done IncLevelDifference calculated ", IncLevelDifference)
    
           IJC_SAMPLE_1     <- tmp_IJC_SAMPLE_1[,1]
           SJC_SAMPLE_1     <- tmp_SJC_SAMPLE_1[,1]
           IncLevel1        <- tmp_IncLevel1[,1]
    
           for (i in 2:length(male_srrnames)) {
            IJC_SAMPLE_1 <- paste(IJC_SAMPLE_1, tmp_IJC_SAMPLE_1[,i], sep=",")
            SJC_SAMPLE_1 <- paste(SJC_SAMPLE_1, tmp_SJC_SAMPLE_1[,i], sep=",")
            IncLevel1    <- paste(IncLevel1   , tmp_IncLevel1   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_1, SJC_SAMPLE_1 and IncLevel1 done!\n")
    
           IJC_SAMPLE_2     <- tmp_IJC_SAMPLE_2[,1]
           SJC_SAMPLE_2     <- tmp_SJC_SAMPLE_2[,1]
           IncLevel2        <- tmp_IncLevel2[,1]
    
           for (i in 2:length(female_srrnames)) {
            IJC_SAMPLE_2 <- paste(IJC_SAMPLE_2, tmp_IJC_SAMPLE_2[,i], sep=",")
            SJC_SAMPLE_2 <- paste(SJC_SAMPLE_2, tmp_SJC_SAMPLE_2[,i], sep=",")
            IncLevel2    <- paste(IncLevel2   , tmp_IncLevel2   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_2, SJC_SAMPLE_2 and IncLevel2 done!\n")
 
           # Ready to print out all the bits.
           # We will paste the line
           # first part comes from the fromGTF.splicetype.txt file
	   # Each splice_type is unique in their files
	   # Here we use fromGTF.A3SS.txt
           file <- paste(paste(tissue_dir, snakecase::to_swap_case(as.character(splice_type)), sep="/"),
                             "MATS.ReadsOnTargetAndJunctionCounts.txt", sep= ".")
           write.table("ID", file=file, append=FALSE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	   
           write.table("GeneID", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
           write.table("geneSymbol", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("chr", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("strand", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("longExonStart_0base", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("longExonEnd", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("shortES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("shortEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("flankingES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("flankingEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("ID.1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SkipFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("PValue", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("FDR", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevelDifference", file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
           message("PASS header written to the file!\n")

           # first part is all the information from the fromGTF
           # The structure we have is of the dimensions of the fromGTF, reduced by row to the junctions of signficance
           for (m in 1:(dim(fromGTF)[1])) {
             for (n in 1:(dim(fromGTF)[2])) {
               write.table(fromGTF[m,n], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	       
    	     }
             write.table(splice_type, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(tissue_index, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,1], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,2], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,3], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS fromGTF file written to the file!\n")
             #now the rest		
             write.table(IncLevelDifference[m], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(junction[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(inclen    [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(skiplen   [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$AdjPVal, file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$B      , file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevelDifference[m], file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS header written to the file!\n")
             message("PASS row ", m)
    	     message("IJC, SJC, IncLevel for SAMPLE_1 and SAMPLE_2 written to the file!\n")
	    
	    
           } # for all junctions of interest
	   
 	  } # and if there is > 0 junctions with inc
#	  rm(keep_gs)
	  
         } # end !is.na(junction)

In [None]:
#       } # and if there is > 0 genes of interest
       rm(keep_tissue)
       
      } # if there are any tissues for splice type "se"
    
      # end if splice_type is "a3ss"
    
    } else if (splice_type == "a5ss") {

      message("tissue of interest is ", tissue_index)
      message("splice_type is ", splice_type)

      # for each splice type the ReadsOnTarget output file we are generating is unique
      file <- paste(paste("../data/fromGTF",
              snakecase::to_swap_case(as.character(splice_type)), sep="."),
      	"txt", sep=".")
      fromGTF          <- read.table(file=file, header=TRUE)

      # Reduce the rows by focusing only on the splice_type, tissue type
      splice_type_gene_as <- gene_as[gene_as$ASE == snakecase::to_swap_case(as.character(splice_type)),]
      message("splice_type_gene_as dim is " , dim(splice_type_gene_as))
      
      # Reduce rows by focusing on the tissue of interest
      tissue              <- splice_type_gene_as$Tissue
      
      keep_tissue         <- tissue %in% tissue_index

      if (sum(keep_tissue == TRUE) > 0) {
	
       splice_type_gene_as <- splice_type_gene_as[keep_tissue == TRUE,]	
#       # Reduce the rows by focusing only on the genes of interest
#       gs                  <- splice_type_gene_as$GeneSymbol
#       if (length(gs) > length (genes_of_interest)) {
#            keep_gs             <- gs %in% genes_of_interest
#       } else {
#            keep_gs             <- genes_of_interest %in% gs
#       }
#       if (sum(keep_gs == TRUE) > 0) {
#        splice_type_gene_as <- splice_type_gene_as[keep_gs == TRUE,]
	
        # order this remaining matrix
        idx                 <- order(splice_type_gene_as$ASE_IDX)
        splice_type_gene_as <- splice_type_gene_as[idx,]
  
        # extract the junction to reduce the other matrices by
  	junction            <- splice_type_gene_as$ASE_IDX

	 if (sum(is.na(junction)) == 0) {
	 
          message("junctions are = ", junction)
	
          # reduce rows by keeping only the junctions we care about
          fromGTF             <- fromGTF[junction,]

          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.ijc.txt.gz", sep=".")
          ijc_counts          <- makeCountsMatrix( filename_gz  <- file)
          ijc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- ijc_counts)
          rm (ijc_counts)
          ijc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- ijc)
          message("PASS ijc reduced matrix to junctions of interest (rows) is done\n")

          # reduce rows by keeping only the junctions we care about
          ijc                 <- ijc[junction,]
          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.sjc.txt.gz", sep=".")
          sjc_counts          <- makeCountsMatrix( filename_gz  <- file)
          sjc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- sjc_counts)
          rm(sjc_counts)
          sjc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- sjc)
          # reduce rows by keeping only the junctions we care about
          sjc                 <- sjc[junction,]
          message("PASS sjc reduced matrix to junctions of interest (rows) is done\n")
 
          # reduce the ijc sjc matrices to just the samples that are of the tissue of interest (columns now)
          tissue_idx          <- c(pData(ijc)$SMTSD == tissue_index)
          ijc                 <- ijc[, tissue_idx == TRUE]
          ijc_srrnames        <- colnames(exprs(ijc))
          sjc                 <- sjc[, tissue_idx == TRUE]
          sjc_srrnames        <- colnames(exprs(sjc))
          message("PASS done reducing matrix to tissue_index (columns) is done\n")
    
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inc.txt.gz", sep=".")
          inc                 <- data.table::fread(file)
          rownames(inc)       <- inc$ID
  	  # remove the first column -- the ID column
          inc                 <- inc[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inclen.txt.gz", sep=".")
          inclen              <- data.table::fread(file)
          rownames(inclen)    <- inclen$ID
     	  # remove the first column -- the ID column
          inclen              <- inclen[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.skiplen.txt.gz", sep=".")
          skiplen             <- data.table::fread(file)
          rownames(skiplen)   <- skiplen$ID
	  # remove the first column -- the ID column
          skiplen             <- skiplen[,-1]
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])	
          message("PASS done inc, inclen and skiplen loaded is done\n")

          # reduce the rows to match our junctions
 	  inc_srrnames        <- colnames(inc)
 	  keep                <- inc_srrnames %in% ijc_srrnames
          inc                 <- matrix(as.numeric(inc    [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          inclen              <- matrix(as.numeric(inclen [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          skiplen             <- matrix(as.numeric(skiplen[,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          message ("inc dim = ", dim(inc))
 	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])
	  if (dim(inc)[1] > 0) {
           message("PASS done inc, inclen and skiplen reduced by columns to match ijc and insuring > 0\n")

           # we need to further divide them into SAMPLE_1 and SAMPLE_2
           # 
           # SAMPLE_1 data will hold all the male samples (GTEx SEX == 1)
           # SAMPLE_2 data will hold all the female samples (GTEx SEX == 2) 
           #
  	   ijc_srrnames     <- as.character(colnames(exprs(ijc)))
           male_srrnames    <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 1])))
           keep             <- ijc_srrnames %in% male_srrnames
 	   table (keep)
           tmp_IJC_SAMPLE_1 <- exprs(ijc[, pData(ijc)$SEX == 1])
           tmp_SJC_SAMPLE_1 <- exprs(sjc[, pData(ijc)$SEX == 1])
           tmp_IncLevel1    <- matrix(as.numeric(inc[,keep==TRUE]),
	                    nrow = length(junction), ncol = length(male_srrnames))
  	   message ("PASS tmp_IncLevel1 made\n")
           message("PASS done separated into male SAMPLE_1\n")

           female_srrnames  <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 2])))
           tmp_IJC_SAMPLE_2 <- exprs(ijc[, pData(ijc)$SEX == 2])
           tmp_SJC_SAMPLE_2 <- exprs(sjc[, pData(ijc)$SEX == 2])
           tmp_IncLevel2    <- matrix(as.numeric(inc[, pData(ijc)$SEX == 2]),
	                    nrow = length(junction), ncol = length(female_srrnames))
           message("PASS done separated into male SAMPLE_2\n")

           inters = intersect(male_srrnames, female_srrnames)
           if (length(inters) != 0) {
             message("ERROR in female to male separation!\n")
           } else {
             message("PASS female and male samples separated\n")
           }

           # InclevelDifference is the difference of the rowMeans of IncLevel1 and IncLevel2
           inclevel1_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel1), nrow = length(junction), ncol = length(male_srrnames)))
           inclevel2_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel2), nrow = length(junction), ncol = length(female_srrnames)))
           IncLevelDifference <- inclevel1_rm - inclevel2_rm
           message("PASS done IncLevelDifference calculated ", IncLevelDifference)
    
           IJC_SAMPLE_1     <- tmp_IJC_SAMPLE_1[,1]
           SJC_SAMPLE_1     <- tmp_SJC_SAMPLE_1[,1]
           IncLevel1        <- tmp_IncLevel1[,1]
    
           for (i in 2:length(male_srrnames)) {
            IJC_SAMPLE_1 <- paste(IJC_SAMPLE_1, tmp_IJC_SAMPLE_1[,i], sep=",")
            SJC_SAMPLE_1 <- paste(SJC_SAMPLE_1, tmp_SJC_SAMPLE_1[,i], sep=",")
            IncLevel1    <- paste(IncLevel1   , tmp_IncLevel1   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_1, SJC_SAMPLE_1 and IncLevel1 done!\n")
    
           IJC_SAMPLE_2     <- tmp_IJC_SAMPLE_2[,1]
           SJC_SAMPLE_2     <- tmp_SJC_SAMPLE_2[,1]
           IncLevel2        <- tmp_IncLevel2[,1]
    
           for (i in 2:length(female_srrnames)) {
            IJC_SAMPLE_2 <- paste(IJC_SAMPLE_2, tmp_IJC_SAMPLE_2[,i], sep=",")
            SJC_SAMPLE_2 <- paste(SJC_SAMPLE_2, tmp_SJC_SAMPLE_2[,i], sep=",")
            IncLevel2    <- paste(IncLevel2   , tmp_IncLevel2   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_2, SJC_SAMPLE_2 and IncLevel2 done!\n")
 
           # Ready to print out all the bits.
           # We will paste the line
           # first part comes from the fromGTF.splicetype.txt file
	   # Each splice_type is unique in their files
	   # Here we use fromGTF.A5SS.txt
           file <- paste(paste(tissue_dir, snakecase::to_swap_case(as.character(splice_type)), sep="/"),
                             "MATS.ReadsOnTargetAndJunctionCounts.txt", sep= ".")
           write.table("ID", file=file, append=FALSE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	   
           write.table("GeneID", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
           write.table("geneSymbol", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("chr", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("strand", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("longExonStart_0base", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("longExonEnd", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("shortES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("shortEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("flankingES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("flankingEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("ID.1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SkipFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("PValue", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("FDR", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevelDifference", file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
           message("PASS header written to the file!\n")

           # first part is all the information from the fromGTF
           # The structure we have is of the dimensions of the fromGTF, reduced by row to the junctions of signficance
           for (m in 1:(dim(fromGTF)[1])) {
             for (n in 1:(dim(fromGTF)[2])) {
               write.table(fromGTF[m,n], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	       
    	     }
             write.table(splice_type, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(tissue_index, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,1], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,2], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,3], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS fromGTF file written to the file!\n")
             #now the rest		
             write.table(IncLevelDifference[m], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(junction[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(inclen    [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(skiplen   [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$AdjPVal, file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$B      , file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevelDifference[m], file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS header written to the file!\n")
             message("PASS row ", m)
    	     message("IJC, SJC, IncLevel for SAMPLE_1 and SAMPLE_2 written to the file!\n")
	    	    
           } # for all junctions of interest
	   
 	  } # and if there is > 0 junctions with inc
#	  rm(keep_gs)
	  
         } # end !is.na(junction)

In [None]:
#       } # and if there is > 0 genes of interest
       rm(keep_tissue)
       
      } # if there are any tissues for splice type "se"
    
      # end if splice_type is "a5ss"
      
    } else if (splice_type == "mxe") {

      message("tissue of interest is ", tissue_index)
      message("splice_type is ", splice_type)

      # for each splice type the ReadsOnTarget output file we are generating is unique
      file <- paste(paste("../data/fromGTF",
              snakecase::to_swap_case(as.character(splice_type)), sep="."),
      	"txt", sep=".")
      fromGTF          <- read.table(file=file, header=TRUE)

      # Reduce the rows by focusing only on the splice_type, tissue type
      splice_type_gene_as <- gene_as[gene_as$ASE == snakecase::to_swap_case(as.character(splice_type)),]
      message("splice_type_gene_as dim is " , dim(splice_type_gene_as))
      
      # Reduce rows by focusing on the tissue of interest
      tissue              <- splice_type_gene_as$Tissue
      
      keep_tissue         <- tissue %in% tissue_index

      if (sum(keep_tissue == TRUE) > 0) {
	
       splice_type_gene_as <- splice_type_gene_as[keep_tissue == TRUE,]

      # Reduce the rows by focusing only on the genes of interest
      gs                  <- splice_type_gene_as$GeneSymbol
      if (length(gs) > length (genes_of_interest)) {
           keep_gs             <- gs %in% genes_of_interest
      } else {
           keep_gs             <- genes_of_interest %in% gs
      }
      if (sum(keep_gs == TRUE) > 0) {
       splice_type_gene_as <- splice_type_gene_as[keep_gs == TRUE,]

In [None]:
        # order this remaining matrix
        idx                 <- order(splice_type_gene_as$ASE_IDX)
        splice_type_gene_as <- splice_type_gene_as[idx,]
  
        # extract the junction to reduce the other matrices by
  	junction            <- splice_type_gene_as$ASE_IDX
	  
	 if (sum(is.na(junction)) == 0) {

          message("junctions are = ", junction)
	
          # reduce rows by keeping only the junctions we care about
          fromGTF             <- fromGTF[junction,]

          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.ijc.txt.gz", sep=".")
          ijc_counts          <- makeCountsMatrix( filename_gz  <- file)
          ijc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- ijc_counts)
          rm (ijc_counts)
          ijc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- ijc)
          message("PASS ijc reduced matrix to junctions of interest (rows) is done\n")

          # reduce rows by keeping only the junctions we care about
          ijc                 <- ijc[junction,]
          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.sjc.txt.gz", sep=".")
          sjc_counts          <- makeCountsMatrix( filename_gz  <- file)
          sjc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- sjc_counts)
          rm(sjc_counts)
          sjc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- sjc)
          # reduce rows by keeping only the junctions we care about
          sjc                 <- sjc[junction,]
          message("PASS sjc reduced matrix to junctions of interest (rows) is done\n")
 
          # reduce the ijc sjc matrices to just the samples that are of the tissue of interest (columns now)
          tissue_idx          <- c(pData(ijc)$SMTSD == tissue_index)
          ijc                 <- ijc[, tissue_idx == TRUE]
          ijc_srrnames        <- colnames(exprs(ijc))
          sjc                 <- sjc[, tissue_idx == TRUE]
          sjc_srrnames        <- colnames(exprs(sjc))
          message("PASS done reducing matrix to tissue_index (columns) is done\n")
    
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inc.txt.gz", sep=".")
          inc                 <- data.table::fread(file)
          rownames(inc)       <- inc$ID
  	  # remove the first column -- the ID column
          inc                 <- inc[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inclen.txt.gz", sep=".")
          inclen              <- data.table::fread(file)
          rownames(inclen)    <- inclen$ID
     	  # remove the first column -- the ID column
          inclen              <- inclen[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.skiplen.txt.gz", sep=".")
          skiplen             <- data.table::fread(file)
          rownames(skiplen)   <- skiplen$ID
	  # remove the first column -- the ID column
          skiplen             <- skiplen[,-1]
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])	
          message("PASS done inc, inclen and skiplen loaded is done\n")

          # reduce the rows to match our junctions
 	  inc_srrnames        <- colnames(inc)
 	  keep                <- inc_srrnames %in% ijc_srrnames
          inc                 <- matrix(as.numeric(inc    [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          inclen              <- matrix(as.numeric(inclen [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          skiplen             <- matrix(as.numeric(skiplen[,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          message ("inc dim = ", dim(inc))
 	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])
	  if (dim(inc)[1] > 0) {
           message("PASS done inc, inclen and skiplen reduced by columns to match ijc and insuring > 0\n")

           # we need to further divide them into SAMPLE_1 and SAMPLE_2
           # 
           # SAMPLE_1 data will hold all the male samples (GTEx SEX == 1)
           # SAMPLE_2 data will hold all the female samples (GTEx SEX == 2) 
           #
  	   ijc_srrnames     <- as.character(colnames(exprs(ijc)))
           male_srrnames    <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 1])))
           keep             <- ijc_srrnames %in% male_srrnames
 	   table (keep)
           tmp_IJC_SAMPLE_1 <- exprs(ijc[, pData(ijc)$SEX == 1])
           tmp_SJC_SAMPLE_1 <- exprs(sjc[, pData(ijc)$SEX == 1])
           tmp_IncLevel1    <- matrix(as.numeric(inc[,keep==TRUE]),
	                    nrow = length(junction), ncol = length(male_srrnames))
  	   message ("PASS tmp_IncLevel1 made\n")
           message("PASS done separated into male SAMPLE_1\n")

           female_srrnames  <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 2])))
           tmp_IJC_SAMPLE_2 <- exprs(ijc[, pData(ijc)$SEX == 2])
           tmp_SJC_SAMPLE_2 <- exprs(sjc[, pData(ijc)$SEX == 2])
           tmp_IncLevel2    <- matrix(as.numeric(inc[, pData(ijc)$SEX == 2]),
	                    nrow = length(junction), ncol = length(female_srrnames))
           message("PASS done separated into male SAMPLE_2\n")

           inters = intersect(male_srrnames, female_srrnames)
           if (length(inters) != 0) {
             message("ERROR in female to male separation!\n")
           } else {
             message("PASS female and male samples separated\n")
           }

           # InclevelDifference is the difference of the rowMeans of IncLevel1 and IncLevel2
           inclevel1_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel1), nrow = length(junction), ncol = length(male_srrnames)))
           inclevel2_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel2), nrow = length(junction), ncol = length(female_srrnames)))
           IncLevelDifference <- inclevel1_rm - inclevel2_rm
           message("PASS done IncLevelDifference calculated ", IncLevelDifference)
    
           IJC_SAMPLE_1     <- tmp_IJC_SAMPLE_1[,1]
           SJC_SAMPLE_1     <- tmp_SJC_SAMPLE_1[,1]
           IncLevel1        <- tmp_IncLevel1[,1]
    
           for (i in 2:length(male_srrnames)) {
            IJC_SAMPLE_1 <- paste(IJC_SAMPLE_1, tmp_IJC_SAMPLE_1[,i], sep=",")
            SJC_SAMPLE_1 <- paste(SJC_SAMPLE_1, tmp_SJC_SAMPLE_1[,i], sep=",")
            IncLevel1    <- paste(IncLevel1   , tmp_IncLevel1   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_1, SJC_SAMPLE_1 and IncLevel1 done!\n")
    
           IJC_SAMPLE_2     <- tmp_IJC_SAMPLE_2[,1]
           SJC_SAMPLE_2     <- tmp_SJC_SAMPLE_2[,1]
           IncLevel2        <- tmp_IncLevel2[,1]
    
           for (i in 2:length(female_srrnames)) {
            IJC_SAMPLE_2 <- paste(IJC_SAMPLE_2, tmp_IJC_SAMPLE_2[,i], sep=",")
            SJC_SAMPLE_2 <- paste(SJC_SAMPLE_2, tmp_SJC_SAMPLE_2[,i], sep=",")
            IncLevel2    <- paste(IncLevel2   , tmp_IncLevel2   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_2, SJC_SAMPLE_2 and IncLevel2 done!\n")
 
           # Ready to print out all the bits.
           # We will paste the line
           # first part comes from the fromGTF.splicetype.txt file
	   # Each splice_type is unique in their files
	   # Here we use fromGTF.MXE.txt
           file <- paste(paste(tissue_dir, snakecase::to_swap_case(as.character(splice_type)), sep="/"),
                             "MATS.ReadsOnTargetAndJunctionCounts.txt", sep= ".")
           write.table("ID", file=file, append=FALSE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	   
           write.table("GeneID", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
           write.table("geneSymbol", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("chr", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("strand", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
	   write.table("X1stExonStart_0base", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("X1stExonEnd", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("X2ndExonStart_0base", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("X2ndExonEnd", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("downstreamES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("downstreamEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("ID.1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SkipFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("PValue", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("FDR", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevelDifference", file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
           message("PASS header written to the file!\n")

           # first part is all the information from the fromGTF
           # The structure we have is of the dimensions of the fromGTF, reduced by row to the junctions of signficance
           for (m in 1:(dim(fromGTF)[1])) {
             for (n in 1:(dim(fromGTF)[2])) {
               write.table(fromGTF[m,n], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	       
    	     }
             write.table(splice_type, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(tissue_index, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,1], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,2], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,3], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS fromGTF file written to the file!\n")
             #now the rest		
             write.table(IncLevelDifference[m], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(junction[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(inclen    [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(skiplen   [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$AdjPVal, file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$B      , file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevelDifference[m], file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS header written to the file!\n")
             message("PASS row ", m)
    	     message("IJC, SJC, IncLevel for SAMPLE_1 and SAMPLE_2 written to the file!\n")
	    
           } # for all junctions of interest
	   
 	  } # and if there is > 0 junctions with inc
#	  rm(keep_gs)
	  
         } # end !is.na(junction)

In [None]:
#       } # and if there is > 0 genes of interest
       rm(keep_tissue)
       
      } # if there are any tissues for splice type "se"
    
      # end if splice_type is "mxe"

    } else if (splice_type == "ri") {

      message("tissue of interest is ", tissue_index)
      message("splice_type is ", splice_type)

      # for each splice type the ReadsOnTarget output file we are generating is unique
      file <- paste(paste("../data/fromGTF",
              snakecase::to_swap_case(as.character(splice_type)), sep="."),
      	"txt", sep=".")
      fromGTF          <- read.table(file=file, header=TRUE)

      # Reduce the rows by focusing only on the splice_type, tissue type
      splice_type_gene_as <- gene_as[gene_as$ASE == snakecase::to_swap_case(as.character(splice_type)),]
      message("splice_type_gene_as dim is " , dim(splice_type_gene_as))
      
      # Reduce rows by focusing on the tissue of interest
      tissue              <- splice_type_gene_as$Tissue
      
      keep_tissue         <- tissue %in% tissue_index

      if (sum(keep_tissue == TRUE) > 0) {
	
       splice_type_gene_as <- splice_type_gene_as[keep_tissue == TRUE,]

      # Reduce the rows by focusing only on the genes of interest
      gs                  <- splice_type_gene_as$GeneSymbol
      if (length(gs) > length (genes_of_interest)) {
           keep_gs             <- gs %in% genes_of_interest
      } else {
           keep_gs             <- genes_of_interest %in% gs
      }      
      if (sum(keep_gs == TRUE) > 0) {
       splice_type_gene_as <- splice_type_gene_as[keep_gs == TRUE,]

In [None]:
        # order this remaining matrix
        idx                 <- order(splice_type_gene_as$ASE_IDX)
        splice_type_gene_as <- splice_type_gene_as[idx,]
  
        # extract the junction to reduce the other matrices by
  	junction            <- splice_type_gene_as$ASE_IDX
	  
	 if (sum(is.na(junction)) == 0) {
	
          message("junctions are = ", junction)
	
          # reduce rows by keeping only the junctions we care about
          fromGTF             <- fromGTF[junction,]

          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.ijc.txt.gz", sep=".")
          ijc_counts          <- makeCountsMatrix( filename_gz  <- file)
          ijc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- ijc_counts)
          rm (ijc_counts)
          ijc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- ijc)
          message("PASS ijc reduced matrix to junctions of interest (rows) is done\n")

          # reduce rows by keeping only the junctions we care about
          ijc                 <- ijc[junction,]
          file                <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.sjc.txt.gz", sep=".")
          sjc_counts          <- makeCountsMatrix( filename_gz  <- file)
          sjc                 <- makeSplicingExpressionSetObject (srr_pdata <- srr_pdata, counts <- sjc_counts)
          rm(sjc_counts)
          sjc                 <- reduceSampleSet(tissue_reduction <- tissue_reduction, es <- sjc)
          # reduce rows by keeping only the junctions we care about
          sjc                 <- sjc[junction,]
          message("PASS sjc reduced matrix to junctions of interest (rows) is done\n")
 
          # reduce the ijc sjc matrices to just the samples that are of the tissue of interest (columns now)
          tissue_idx          <- c(pData(ijc)$SMTSD == tissue_index)
          ijc                 <- ijc[, tissue_idx == TRUE]
          ijc_srrnames        <- colnames(exprs(ijc))
          sjc                 <- sjc[, tissue_idx == TRUE]
          sjc_srrnames        <- colnames(exprs(sjc))
          message("PASS done reducing matrix to tissue_index (columns) is done\n")
    
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inc.txt.gz", sep=".")
          inc                 <- data.table::fread(file)
          rownames(inc)       <- inc$ID
  	  # remove the first column -- the ID column
          inc                 <- inc[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.inclen.txt.gz", sep=".")
          inclen              <- data.table::fread(file)
          rownames(inclen)    <- inclen$ID
     	  # remove the first column -- the ID column
          inclen              <- inclen[,-1]
          file <- paste(paste("../data/rmats_final",splice_type, sep="."), "jc.skiplen.txt.gz", sep=".")
          skiplen             <- data.table::fread(file)
          rownames(skiplen)   <- skiplen$ID
	  # remove the first column -- the ID column
          skiplen             <- skiplen[,-1]
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])	
          message("PASS done inc, inclen and skiplen loaded is done\n")

          # reduce the rows to match our junctions
 	  inc_srrnames        <- colnames(inc)
 	  keep                <- inc_srrnames %in% ijc_srrnames
          inc                 <- matrix(as.numeric(inc    [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          inclen              <- matrix(as.numeric(inclen [,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          skiplen             <- matrix(as.numeric(skiplen[,keep == TRUE]),
	                          nrow=length(junction), ncol = length(inc_srrnames))
          message ("inc dim = ", dim(inc))
 	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inc dim [1] = ", dim(inc)[1])
	  message ("inc dim [2] = ", dim(inc)[2])	
	  message ("inclen dim [1] = ", dim(inclen)[1])
	  message ("inclen dim [2] = ", dim(inclen)[2])	
	  message ("skiplen dim [1] = ", dim(skiplen)[1])
	  message ("skiplen dim [2] = ", dim(skiplen)[2])
	  if (dim(inc)[1] > 0) {
           message("PASS done inc, inclen and skiplen reduced by columns to match ijc and insuring > 0\n")

           # we need to further divide them into SAMPLE_1 and SAMPLE_2
           # 
           # SAMPLE_1 data will hold all the male samples (GTEx SEX == 1)
           # SAMPLE_2 data will hold all the female samples (GTEx SEX == 2) 
           #
  	   ijc_srrnames     <- as.character(colnames(exprs(ijc)))
           male_srrnames    <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 1])))
           keep             <- ijc_srrnames %in% male_srrnames
 	   table (keep)
           tmp_IJC_SAMPLE_1 <- exprs(ijc[, pData(ijc)$SEX == 1])
           tmp_SJC_SAMPLE_1 <- exprs(sjc[, pData(ijc)$SEX == 1])
           tmp_IncLevel1    <- matrix(as.numeric(inc[,keep==TRUE]),
	                    nrow = length(junction), ncol = length(male_srrnames))
  	   message ("PASS tmp_IncLevel1 made\n")
           message("PASS done separated into male SAMPLE_1\n")

           female_srrnames  <- as.character(colnames(exprs(ijc[,pData(ijc)$SEX == 2])))
           tmp_IJC_SAMPLE_2 <- exprs(ijc[, pData(ijc)$SEX == 2])
           tmp_SJC_SAMPLE_2 <- exprs(sjc[, pData(ijc)$SEX == 2])
           tmp_IncLevel2    <- matrix(as.numeric(inc[, pData(ijc)$SEX == 2]),
	                    nrow = length(junction), ncol = length(female_srrnames))
           message("PASS done separated into male SAMPLE_2\n")

           inters = intersect(male_srrnames, female_srrnames)
           if (length(inters) != 0) {
             message("ERROR in female to male separation!\n")
           } else {
             message("PASS female and male samples separated\n")
           }

           # InclevelDifference is the difference of the rowMeans of IncLevel1 and IncLevel2
           inclevel1_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel1), nrow = length(junction), ncol = length(male_srrnames)))
           inclevel2_rm       <- rowMeans(matrix(as.numeric(tmp_IncLevel2), nrow = length(junction), ncol = length(female_srrnames)))
           IncLevelDifference <- inclevel1_rm - inclevel2_rm
           message("PASS done IncLevelDifference calculated ", IncLevelDifference)
    
           IJC_SAMPLE_1     <- tmp_IJC_SAMPLE_1[,1]
           SJC_SAMPLE_1     <- tmp_SJC_SAMPLE_1[,1]
           IncLevel1        <- tmp_IncLevel1[,1]
    
           for (i in 2:length(male_srrnames)) {
            IJC_SAMPLE_1 <- paste(IJC_SAMPLE_1, tmp_IJC_SAMPLE_1[,i], sep=",")
            SJC_SAMPLE_1 <- paste(SJC_SAMPLE_1, tmp_SJC_SAMPLE_1[,i], sep=",")
            IncLevel1    <- paste(IncLevel1   , tmp_IncLevel1   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_1, SJC_SAMPLE_1 and IncLevel1 done!\n")
    
           IJC_SAMPLE_2     <- tmp_IJC_SAMPLE_2[,1]
           SJC_SAMPLE_2     <- tmp_SJC_SAMPLE_2[,1]
           IncLevel2        <- tmp_IncLevel2[,1]
    
           for (i in 2:length(female_srrnames)) {
            IJC_SAMPLE_2 <- paste(IJC_SAMPLE_2, tmp_IJC_SAMPLE_2[,i], sep=",")
            SJC_SAMPLE_2 <- paste(SJC_SAMPLE_2, tmp_SJC_SAMPLE_2[,i], sep=",")
            IncLevel2    <- paste(IncLevel2   , tmp_IncLevel2   [,i], sep=",")
           }
           message("PASS IJC_SAMPLE_2, SJC_SAMPLE_2 and IncLevel2 done!\n")
 
           # Ready to print out all the bits.
           # We will paste the line
           # first part comes from the fromGTF.splicetype.txt file
	   # Each splice_type is unique in their files
	   # Here we use fromGTF.RI.txt
           file <- paste(paste(tissue_dir, snakecase::to_swap_case(as.character(splice_type)), sep="/"),
                             "MATS.ReadsOnTargetAndJunctionCounts.txt", sep= ".")
           write.table("ID", file=file, append=FALSE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	   
           write.table("GeneID", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
           write.table("geneSymbol", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("chr", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("strand", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
	   write.table("riExonStart_0base", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("riExonEnd", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("upstreamES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("upstreamEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("downstreamES", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("downstreamEE", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("ID.1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SC_SAMPLE_2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("SkipFormLen", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("PValue", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("FDR", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel1", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevel2", file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
           write.table("IncLevelDifference", file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
           message("PASS header written to the file!\n")

           # first part is all the information from the fromGTF
           # The structure we have is of the dimensions of the fromGTF, reduced by row to the junctions of signficance
           for (m in 1:(dim(fromGTF)[1])) {
             for (n in 1:(dim(fromGTF)[2])) {
               write.table(fromGTF[m,n], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)	       
    	     }
             write.table(splice_type, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(tissue_index, file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,1], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,2], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(fromGTF[m,3], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS fromGTF file written to the file!\n")
             #now the rest		
             write.table(IncLevelDifference[m], file="../data/incLevelDifferences.tsv", append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE) 
             write.table(junction[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(SJC_SAMPLE_2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(inclen    [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(skiplen   [m,1], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$AdjPVal, file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(splice_type_gene_as[m,]$B      , file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel1[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevel2[m], file=file, append=TRUE, eol="\t",row.names=FALSE,col.names=FALSE,quote=FALSE)
             write.table(IncLevelDifference[m], file=file, append=TRUE, eol="\n",row.names=FALSE,col.names=FALSE,quote=FALSE)
             message("PASS header written to the file!\n")
             message("PASS row ", m)
    	     message("IJC, SJC, IncLevel for SAMPLE_1 and SAMPLE_2 written to the file!\n")
	    
           } # for all junctions of interest
	   
 	  } # and if there is > 0 junctions with inc
#	  rm(keep_gs)
	  
         } # end !is.na(junction)

In [None]:
#       } # and if there is > 0 genes of interest
       rm(keep_tissue)
       
      } # if there are any tissues for splice type "se"
    
    } # end for splice_type "ri"
	
  } # for all splice_types

In [None]:
}  # for all tissues

### Appendix - Metadata

For replicability and reproducibility purposes, we also print the following metadata:

1. Checksums of **'artefacts'**, files generated during the analysis and stored in the folder directory **`data`**
2. List of environment metadata, dependencies, versions of libraries using `utils::sessionInfo()` and [`devtools::session_info()`](https://devtools.r-lib.org/reference/session_info.html)

### Appendix - 1. Checksums with the sha256 algorithm

In [None]:
rm (notebookid)
notebookid   = "prepDataForMaser"
notebookid

message("Generating sha256 checksums of the artefacts in the `..data/` directory .. ")
system(paste0("cd ../data && find . -type f -exec sha256sum {} \\;  >  ../metadata/", notebookid, "_sha256sums.txt"), intern = TRUE)
message("Done!\n")

paste0("../metadata/", notebookid, "_sha256sums.txt")

data.table::fread(paste0("../metadata/", notebookid, "_sha256sums.txt"), header = FALSE, col.names = c("sha256sum", "file"))

### Appendix - 2. Libraries Metadata

In [None]:
dev_session_info   <- devtools::session_info()
utils_session_info <- utils::sessionInfo()

message("Saving `devtools::session_info()` objects in ../metadata/devtools_session_info.rds  ..")
saveRDS(dev_session_info, file = paste0("../metadata/", notebookid, "_devtools_session_info.rds"))
message("Done!\n")

message("Saving `utils::sessionInfo()` objects in ../metadata/utils_session_info.rds  ..")
saveRDS(utils_session_info, file = paste0("../metadata/", notebookid ,"_utils_info.rds"))
message("Done!\n")

dev_session_info$platform
dev_session_info$packages[dev_session_info$packages$attached==TRUE, ]