# Testing Janggu's Functionality

**Authorship:**
Adam Klie, *08/04/2022*
***
**Description:**
Notebook for testing Janggu's compatibility with other libraries.

In [1]:
from pkg_resources import resource_filename
from janggu.data import Bioseq, Cover

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

# Bioseq

In [6]:
fasta_file = resource_filename(
    'janggu',
    'resources/sample.fa'
)
fasta_file

'/cellar/users/aklie/opt/miniconda3/envs/eugene_benchmarks/lib/python3.7/site-packages/janggu/resources/sample.fa'

In [21]:
dna = Bioseq.create_from_seq(
    name='dna',
    fastafile=fasta_file
)

In [23]:
dna[:].shape

(3897, 200, 1, 4)

In [25]:
def read_bed(
    roi_file: str,
    ref_file: str,
    **kwargs
):
    roi = Bioseq.create_from_refgenome(
        name="dna",
        refgenome=ref_file,
        roi=roi_file,
        **kwargs
    )
    return roi


In [27]:
test_bioseq = read_bed(
    roi_file=resource_filename('janggu', 'resources/sample.bed'),
    ref_file=resource_filename('janggu', 'resources/sample_genome.fa'),
)

In [29]:
test_bioseq[:].shape

(2, 10000, 1, 4)

# Cover

In [30]:
from janggu.data import Cover

In [31]:
bam_file = resource_filename('janggu',
                             'resources/sample.bam')

roi = resource_filename('janggu',
                        'resources/sample.bed')

In [32]:
cover = Cover.create_from_bam('read_count_coverage',
                              bamfiles=bam_file,
                              binsize=200,
                              stepsize=200,
                              roi=roi)

cover.shape  # is (100, 200, 2, 1)
cover[0]  # coverage of the first region

array([[[[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        [[0.],
         [0.]],

        

In [None]:
input_dir = '/cellar/users/aklie/data/eugene/junD'
bed_file = os.path.join(
    input_dir,
    "jund_raw_peaks.bed" 
)
roi_file = os.path.join(
    input_dir,
    "trim_roi_jund_extended.bed"
)
refgenome = os.path.join(
    input_dir,
    "hg38.fa"
)
bed_file, roi_file, refgenome

# Bedtools

In [1]:
import os
import importlib
import sys
import pybedtools
import subprocess

In [2]:
bin_dir = os.path.dirname(sys.executable)
os.environ['PATH'] += os.pathsep + bin_dir
os.environ["PATH"]

'/cm/local/apps/environment-modules/4.4.0//bin:/mnt/beegfs/users/aklie/opt/google-cloud-sdk/bin:/cellar/users/aklie/opt/miniconda3/bin:/cellar/users/aklie/opt/miniconda3/condabin:/cellar/users/aklie/opt/deltasvm_script/deltasvm.pl:/cellar/users/aklie/opt/lsgkm-svr/bin:/cellar/users/aklie/opt/gatk-4.2.6.1:/cellar/users/mpagadal/Programs/PRSICE/PRSice.R:/cellar/users/aklie/opt/plink:/cellar/users/aklie/opt/plink2:/cellar/users/aklie/opt/confusion_matrix:/cellar/users/aklie/bin/motif_finding.sh:/cellar/users/aklie/opt/edirect:/cellar/users/mpagadal/Programs/bcftools-1.11:/cellar/users/aklie/opt/homer/bin:/cellar/users/aklie/opt/Gene2vec/src:/cellar/users/aklie/opt:/cellar/users/aklie/.local/bin:/cm/local/apps/cuda/libs/current/bin:/cm/shared/apps/cuda10.2/sdk/10.2.89/bin/x86_64/linux/release:/cm/shared/apps/cuda10.2/toolkit/10.2.89/bin:/cm/local/apps/jupyter-submit:/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/cm/local/apps/environment-modules/4.4.0/bin:/usr/local/

In [3]:
pybedtools.set_bedtools_path = bin_dir

In [6]:
from pybedtools import paths
paths._set_bedtools_path(bin_dir)

1

In [8]:
a = pybedtools.example_bedtool('a.bed')
b = pybedtools.example_bedtool('b.bed')
print(a.intersect(b))

chr1	155	200	feature2	0	+
chr1	155	200	feature3	0	-
chr1	900	901	feature4	0	+



In [21]:
(a-b).count()

In [7]:
pybedtools.settings._bedtools_path = "/cellar/users/aklie/opt/miniconda3/envs/eugene_benchmarks/bin/"

In [10]:
helpers._check_for_bedtools(verbose=True)

Found bedtools version 'bedtools v2.30.0'


In [17]:
settings._bedtools_installed

True

In [18]:
p = subprocess.Popen(
    helpers._version_2_15_plus_names("intersectBed") + ["-h"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
)

In [19]:
p.communicate()[1].decode()

'\nTool:    bedtools intersect (aka intersectBed)\nVersion: v2.30.0\nSummary: Report overlaps between two feature files.\n\nUsage:   bedtools intersect [OPTIONS] -a <bed/gff/vcf/bam> -b <bed/gff/vcf/bam>\n\n\tNote: -b may be followed with multiple databases and/or \n\twildcard (*) character(s). \nOptions: \n\t-wa\tWrite the original entry in A for each overlap.\n\n\t-wb\tWrite the original entry in B for each overlap.\n\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r.\n\n\t-loj\tPerform a "left outer join". That is, for each feature in A\n\t\treport each overlap with B.  If no overlaps are found, \n\t\treport a NULL feature for B.\n\n\t-wo\tWrite the original A and B entries plus the number of base\n\t\tpairs of overlap between the two features.\n\t\t- Overlaps restricted by -f and -r.\n\t\t  Only A features with overlap are reported.\n\n\t-wao\tWrite the original A and B entries plus the number of base\n\t\tpairs of overlap between the two features.\n\t\t- Overlappi

In [15]:
pybedtools = importlib.reload(pybedtools)

True

In [23]:
a = pybedtools.example_bedtool('a.bed')
b = pybedtools.example_bedtool('b.bed')
print(a.intersect(b))

chr1	155	200	feature2	0	+
chr1	155	200	feature3	0	-
chr1	900	901	feature4	0	+



In [20]:
resource_filename

NameError: name 'resource_filename' is not defined