**Author :** Rutendo F. Sigauke

**Input  :** 

1. Normalized counts
           
        - gene_bidir_tpm_allBidirs_filteredSamples.tsv.gz
        
2. Summary of normalized counts        

        - gene_bidir_tpm_summary_allBidirs_filteredSamples.tsv.gz

3. List of high QC samples
        
        - human_samples_QC_GC_protocol_filtered.tsv.gz
        
4. Full gene length counts

        - counts_filt_gene_stranded_counts.txt
        
5. Bidirectional transcripts that DO NO OVERLAP genes

        - hg38_tfit_dreg_bidirectionals_non_genes.bed
        
6. Bidirectional transcripts that OVERLAP genes

        - hg38_tfit_dreg_bidirectionals_in_genes.bed


**Output :**

1. Subset of counts and transcripts

        a. Intergenic: `genes_inter_bidir_filtered.tsv.gz`
        b. Intragenic: `genes_intra_bidir_filtered.tsv.gz`

# Libraries

In [1]:
library(data.table) 

# Load data

## Metadata

In [2]:
hg38_high_qc_gc <- data.table::fread("/Users/rusi2317/projects/meta_analysis_qc/hg38/processed_data/counts/normalized/human_samples_QC_GC_protocol_filtered.tsv.gz",
                  sep='\t')

In [3]:
as.data.frame(table(hg38_high_qc_gc$tissue))

Var1,Freq
<fct>,<int>
blood,222
bone,4
brain,6
breast,159
embryo,27
eye,1
heart,22
intestine,47
kidney,63
liver,7


## Normalized counts

In [4]:
##All bidirectionals Filtered samples
gene_all_bidir_tpm_filtered <- data.table::fread("/Users/rusi2317/projects/meta_analysis_qc/hg38/processed_data/counts/normalized/gene_bidir_tpm_allBidirs_filteredSamples.tsv.gz")
gene_all_bidir_tpm_filtered_summary <- data.table::fread("/Users/rusi2317/projects/meta_analysis_qc/hg38/processed_data/counts/normalized/gene_bidir_tpm_summary_allBidirs_filteredSamples.tsv.gz")

In [5]:
dim(gene_all_bidir_tpm_filtered)
head(gene_all_bidir_tpm_filtered, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr1,12623,14409,DDX11L1:NR_046018.2,.,+,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
chr1,14361,28620,WASH7P:NR_024540.1,.,-,0.1844719,0.03814665,0.1945557,0.1415614,⋯,0.06812888,0.04399975,0.05040534,0.2144871,0.3700028,0.4193475,0.3418568,0.2917163,0.3737646,0.4307619
chr1,17368,17436,MIR6859-1:NR_106918.1,.,-,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Full gene counts

In [6]:
counts_genes_full <- data.table::fread('/Users/rusi2317/projects/meta_analysis_qc/hg38/processed_data/counts/genes/counts_filt_gene_stranded_counts.txt',
                                 sep='\t')
dim(counts_genes_full)
head(counts_genes_full, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713716,SRR3713717,SRR3713718,SRR3713719,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
chr1,11873,14409,DDX11L1:NR_046018.2,.,+,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
chr1,14361,29370,WASH7P:NR_024540.1,.,-,43,4,45,34,⋯,12,10,38,30,42,62,53,44,49,61
chr1,17368,17436,MIR6859-1:NR_106918.1,.,-,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


# Subset transcripts

## Intergenic bidirectionals 

In [7]:
bidir_anno_non_genes <- data.table::fread('/scratch/Users/rusi2317/projects/meta_analysis_qc/hg38/annotations/hg38_tfit_dreg_bidirectionals_non_genes.bed',
                                  sep='\t')
colnames(bidir_anno_non_genes) <- c('chrom','start','end','bidirs','score','strand')
bidir_anno_non_genes$bidirs <- paste0(bidir_anno_non_genes$chrom,':',
                                     bidir_anno_non_genes$start,'-',
                                     bidir_anno_non_genes$end)
dim(bidir_anno_non_genes)
head(bidir_anno_non_genes, 3)

chrom,start,end,bidirs,score,strand
<chr>,<int>,<int>,<chr>,<int>,<chr>
chr1,3917,4919,chr1:3917-4919,14,.
chr1,5632,6042,chr1:5632-6042,14,.
chr1,6132,6486,chr1:6132-6486,7,.


In [8]:
bidir_tpm_intergenic <- gene_all_bidir_tpm_filtered[gene_all_bidir_tpm_filtered$gene_transcript %in%
                                                      unique(bidir_anno_non_genes$bidirs) , ] 
dim(bidir_tpm_intergenic)
head(bidir_tpm_intergenic, 3)
tail(bidir_tpm_intergenic, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr1,3917,4919,chr1:3917-4919,14,.,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
chr1,5632,6042,chr1:5632-6042,14,.,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
chr1,6132,6486,chr1:6132-6486,7,.,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chrY,56881721,56881941,chrY:56881721-56881941,3,.,0,0,0,0,⋯,0,0,0,0,0,0,0.4161938,0,0,0
chrY,56883158,56883488,chrY:56883158-56883488,2,.,0,0,0,0,⋯,0,0,0,0,0,0,0.0,0,0,0
chrY,56884695,56885095,chrY:56884695-56885095,2,.,0,0,0,0,⋯,0,0,0,0,0,0,0.0,0,0,0


In [9]:
length(bidir_tpm_intergenic$gene_transcript)
length(unique(bidir_tpm_intergenic$gene_transcript))

## Intragenic bidirectionals 

In [10]:
bidir_anno_genes <- data.table::fread('/scratch/Users/rusi2317/projects/meta_analysis_qc/hg38/annotations/hg38_tfit_dreg_bidirectionals_in_genes.bed',
                                     sep='\t')

colnames(bidir_anno_genes) <- c('bidir_chrom','bidir_start','bidir_end',
                                'bidir_id','bidir_score','bidir_strand',
                               'gene_chrom','gene_start','gene_end',
                                'gene_id','gene_score','gene_strand')
bidir_anno_genes$bidirs <- paste0(bidir_anno_genes$bidir_chrom,':',
                                     bidir_anno_genes$bidir_start,'-',
                                     bidir_anno_genes$bidir_end)

print(paste0("Number of overlaps: ",nrow(bidir_anno_genes)))
print(paste0("Unique bidirectionals overlapping: ", length(unique(bidir_anno_genes$bidirs))))
head(bidir_anno_genes, 3)
tail(bidir_anno_genes, 3)

[1] "Number of overlaps: 873393"
[1] "Unique bidirectionals overlapping: 494876"


bidir_chrom,bidir_start,bidir_end,bidir_id,bidir_score,bidir_strand,gene_chrom,gene_start,gene_end,gene_id,gene_score,gene_strand,bidirs
<chr>,<int>,<int>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>
chr1,12182,12456,"tfit,dreg",1,.,chr1,11873,14409,DDX11L1:NR_046018.2,.,+,chr1:12182-12456
chr1,13264,13506,tfit,14,.,chr1,11873,14409,DDX11L1:NR_046018.2,.,+,chr1:13264-13506
chr1,14655,14811,"tfit,dreg",14,.,chr1,14361,29370,WASH7P:NR_024540.1,.,-,chr1:14655-14811


bidir_chrom,bidir_start,bidir_end,bidir_id,bidir_score,bidir_strand,gene_chrom,gene_start,gene_end,gene_id,gene_score,gene_strand,bidirs
<chr>,<int>,<int>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>
chrY,25390010,25390360,dreg,1,.,chrY,25378299,25394719,SEPTIN14P23:NR_174182.1,.,-,chrY:25390010-25390360
chrY,25390600,25390950,dreg,3,.,chrY,25378299,25394719,SEPTIN14P23:NR_174182.1,.,-,chrY:25390600-25390950
chrY,25392590,25393000,dreg,2,.,chrY,25378299,25394719,SEPTIN14P23:NR_174182.1,.,-,chrY:25392590-25393000


In [11]:
length(unique(bidir_tpm_intergenic$gene_transcript)) + length(unique(bidir_anno_genes$bidirs))

In [12]:
bidir_tpm_intragenic <- gene_all_bidir_tpm_filtered[gene_all_bidir_tpm_filtered$gene_transcript %in%
                                                      unique(bidir_anno_genes$bidirs) , ] 
dim(bidir_tpm_intragenic)
head(bidir_tpm_intragenic, 3)
tail(bidir_tpm_intragenic, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr1,14655,14811,chr1:14655-14811,14,.,0,0,0,0,⋯,0,0,0.0,0,0,0,0,0,0,0
chr1,15084,15360,chr1:15084-15360,9,.,0,0,0,0,⋯,0,0,0.0,0,0,0,0,0,0,0
chr1,16191,16429,chr1:16191-16429,128,.,0,0,0,0,⋯,0,0,0.3007449,0,0,0,0,0,0,0


chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chrY,24849020,24849850,chrY:24849020-24849850,1,.,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
chrY,24881508,24881926,chrY:24881508-24881926,2,.,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
chrY,24903562,24903988,chrY:24903562-24903988,6,.,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


## Genes

In [13]:
gene_trunc <- gene_all_bidir_tpm_filtered[gene_all_bidir_tpm_filtered$gene_transcript %in%
                                          counts_genes_full$gene_transcript,]
dim(gene_trunc)
head(gene_trunc, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr1,12623,14409,DDX11L1:NR_046018.2,.,+,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
chr1,14361,28620,WASH7P:NR_024540.1,.,-,0.1844719,0.03814665,0.1945557,0.1415614,⋯,0.06812888,0.04399975,0.05040534,0.2144871,0.3700028,0.4193475,0.3418568,0.2917163,0.3737646,0.4307619
chr1,17368,17436,MIR6859-1:NR_106918.1,.,-,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
head(counts_genes_full[,1:6], 3)
head(gene_trunc[,1:6],3)

chrom,start,stop,gene_transcript,score,strand
<chr>,<int>,<int>,<chr>,<chr>,<chr>
chr1,11873,14409,DDX11L1:NR_046018.2,.,+
chr1,14361,29370,WASH7P:NR_024540.1,.,-
chr1,17368,17436,MIR6859-1:NR_106918.1,.,-


chrom,start,stop,gene_transcript,score,strand
<chr>,<int>,<int>,<chr>,<chr>,<chr>
chr1,12623,14409,DDX11L1:NR_046018.2,.,+
chr1,14361,28620,WASH7P:NR_024540.1,.,-
chr1,17368,17436,MIR6859-1:NR_106918.1,.,-


In [15]:
genes_merged <- merge(counts_genes_full[,1:6], 
                      gene_all_bidir_tpm_filtered[,c(4,7:757)], 
                     by = 'gene_transcript')
dim(genes_merged)
head(genes_merged, 3)

gene_transcript,chrom,start,stop,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
A1BG-AS1:NR_015380.2,chr19,58351969,58355183,.,+,0.4715395,0.275847512,0.350156404,0.28903447,⋯,4.7901330211,8.908832855,9.360180257,11.26521,24.003756,23.515781,22.649555,21.094696,23.696213379,22.427576708
A1BG:NM_130786.4,chr19,58345182,58353492,.,-,0.6391889,0.413680061,0.742015737,0.722426895,⋯,1.7346237696,1.293155683,1.3308983947,1.22421,2.425766,1.773117,2.165353,2.375756,2.301772168,2.450560985
A1CF:NM_014576.4,chr10,50799408,50885627,.,-,0.0,0.001591118,0.002164006,0.002778636,⋯,0.0004371834,0.001835253,0.0008409737,0.0009417236,0.0,0.0,0.0,0.0,0.002545294,0.001178184


In [16]:
##oder the merged columns by the input files
column_order <- colnames(gene_all_bidir_tpm_filtered)

gene_new_annotations <- setcolorder(genes_merged, column_order)
dim(gene_new_annotations)
head(gene_new_annotations, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr19,58351969,58355183,A1BG-AS1:NR_015380.2,.,+,0.4715395,0.275847512,0.350156404,0.28903447,⋯,4.7901330211,8.908832855,9.360180257,11.26521,24.003756,23.515781,22.649555,21.094696,23.696213379,22.427576708
chr19,58345182,58353492,A1BG:NM_130786.4,.,-,0.6391889,0.413680061,0.742015737,0.722426895,⋯,1.7346237696,1.293155683,1.3308983947,1.22421,2.425766,1.773117,2.165353,2.375756,2.301772168,2.450560985
chr10,50799408,50885627,A1CF:NM_014576.4,.,-,0.0,0.001591118,0.002164006,0.002778636,⋯,0.0004371834,0.001835253,0.0008409737,0.0009417236,0.0,0.0,0.0,0.0,0.002545294,0.001178184


In [17]:
##Check the annotations
subset(gene_all_bidir_tpm_filtered, gene_transcript=='A1BG:NM_130786.4')
subset(gene_new_annotations, gene_transcript=='A1BG:NM_130786.4')

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr19,58345182,58352742,A1BG:NM_130786.4,.,-,0.6391889,0.4136801,0.7420157,0.7224269,⋯,1.734624,1.293156,1.330898,1.22421,2.425766,1.773117,2.165353,2.375756,2.301772,2.450561


chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr19,58345182,58353492,A1BG:NM_130786.4,.,-,0.6391889,0.4136801,0.7420157,0.7224269,⋯,1.734624,1.293156,1.330898,1.22421,2.425766,1.773117,2.165353,2.375756,2.301772,2.450561


# Sets of transcripts

## Genes + Intergenic

In [18]:
genes_inter_bidir <- rbind(gene_new_annotations,bidir_tpm_intergenic)
dim(genes_inter_bidir)
head(genes_inter_bidir, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr19,58351969,58355183,A1BG-AS1:NR_015380.2,.,+,0.4715395,0.275847512,0.350156404,0.28903447,⋯,4.7901330211,8.908832855,9.360180257,11.26521,24.003756,23.515781,22.649555,21.094696,23.696213379,22.427576708
chr19,58345182,58353492,A1BG:NM_130786.4,.,-,0.6391889,0.413680061,0.742015737,0.722426895,⋯,1.7346237696,1.293155683,1.3308983947,1.22421,2.425766,1.773117,2.165353,2.375756,2.301772168,2.450560985
chr10,50799408,50885627,A1CF:NM_014576.4,.,-,0.0,0.001591118,0.002164006,0.002778636,⋯,0.0004371834,0.001835253,0.0008409737,0.0009417236,0.0,0.0,0.0,0.0,0.002545294,0.001178184


In [19]:
head(gene_all_bidir_tpm_filtered_summary)

chrom,start,stop,gene_transcript,score,strand,length,stdev,variance,sum,mean,median,coefvar,max,min,num_transcribed,percent_transcribed,transcript_type
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<chr>
chr1,12623,14409,DDX11L1:NR_046018.2,.,+,1787,0.072099287,0.005198307,6.76054154,0.009002053,0.0,8.0092052,1.25232307,0,29,3.861518,genes
chr1,14361,28620,WASH7P:NR_024540.1,.,-,14260,0.190457404,0.03627402,163.61697071,0.2178655,0.1674576,0.8741973,1.5870462,0,749,99.7336884,genes
chr1,17368,17436,MIR6859-1:NR_106918.1,.,-,69,0.0,0.0,0.0,0.0,0.0,,0.0,0,0,0.0,genes
chr1,30365,30503,MIR1302-2:NR_036051.1,.,+,139,0.034063044,0.001160291,0.93347658,0.001242978,0.0,27.4043792,0.93347658,0,1,0.1331558,genes
chr1,34610,35591,FAM138A:NR_026818.1,.,-,982,0.002709721,7.342586e-06,0.07425821,9.887911e-05,0.0,27.4043792,0.07425821,0,1,0.1331558,genes
chr1,66168,71585,OR4F5:NM_001005484.2,.,+,5418,0.002555593,6.531054e-06,0.1886045,0.0002511378,0.0,10.1760572,0.04010044,0,11,1.4647137,genes


In [20]:
#filter by transcripts by  transcription

filtered <- subset(gene_all_bidir_tpm_filtered_summary, max > 1 & num_transcribed > 19)

nrow(gene_all_bidir_tpm_filtered_summary)
nrow(filtered)
nrow(gene_all_bidir_tpm_filtered_summary)- nrow(filtered)

In [21]:
genes_inter_bidir_filtered <- genes_inter_bidir[genes_inter_bidir$gene_transcript %in%
                                                filtered$gene_transcript,]
dim(genes_inter_bidir_filtered)

In [22]:
data.table::fwrite(genes_inter_bidir_filtered,
                   "/Users/rusi2317/projects/meta_analysis_qc/hg38/processed_data/counts/normalized/genes_inter_bidir_filtered.tsv.gz",
                  sep='\t')

## Genes + Intragenic

In [23]:
genes_intra_bidir <- rbind(gene_new_annotations, bidir_tpm_intragenic)
dim(genes_intra_bidir)
head(genes_intra_bidir, 3)

chrom,start,stop,gene_transcript,score,strand,SRR7266931,SRR7266932,SRR7266933,SRR7266934,⋯,SRR3713715,SRR3713716,SRR3713717,SRR3713718,SRR8483105,SRR8483106,SRR8483107,SRR8483108,SRR8483109,SRR8483110
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr19,58351969,58355183,A1BG-AS1:NR_015380.2,.,+,0.4715395,0.275847512,0.350156404,0.28903447,⋯,4.7901330211,8.908832855,9.360180257,11.26521,24.003756,23.515781,22.649555,21.094696,23.696213379,22.427576708
chr19,58345182,58353492,A1BG:NM_130786.4,.,-,0.6391889,0.413680061,0.742015737,0.722426895,⋯,1.7346237696,1.293155683,1.3308983947,1.22421,2.425766,1.773117,2.165353,2.375756,2.301772168,2.450560985
chr10,50799408,50885627,A1CF:NM_014576.4,.,-,0.0,0.001591118,0.002164006,0.002778636,⋯,0.0004371834,0.001835253,0.0008409737,0.0009417236,0.0,0.0,0.0,0.0,0.002545294,0.001178184


In [24]:
#filter by transcripts by  transcription
genes_intra_bidir_filtered <- genes_intra_bidir[genes_intra_bidir$gene_transcript %in%
                                                filtered$gene_transcript,]
dim(genes_intra_bidir_filtered)

In [25]:
data.table::fwrite(genes_intra_bidir_filtered,
                   "/Users/rusi2317/projects/meta_analysis_qc/hg38/processed_data/counts/normalized/genes_intra_bidir_filtered.tsv.gz",
                  sep='\t')

# Session Information

In [26]:
sessionInfo()

R version 3.6.0 (2019-04-26)
Platform: x86_64-redhat-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] data.table_1.14.2

loaded via a namespace (and not attached):
 [1] fansi_1.0.3       crayon_1.5.1      digest_0.6.29     utf8_1.2.2       
 [5] R.methodsS3_1.8.2 IRdisplay_1.1     repr_1.1.4        lifecycle_1.0.3  
 [9] jsonlite_1.8.0    evaluate_0.16     pillar_1.8.1      rlang_1.0.6      
[13] cli_3.4.1         uuid_1.1-0     