In [1]:
import sys
sys.path.append('../')
from config_sing import *

In [42]:
### import tools
import sqlite3
import itertools as it
from functools import reduce
from collections import defaultdict
from mpl_toolkits.axes_grid1 import make_axes_locatable
plt.style.use("seaborn-whitegrid")

### Set Samples
fun = np.core.defchararray.add
idx = np.arange(1,6).astype("str")

INPUT    = reduce(fun, ["Input", idx             ])
INPUT20X = reduce(fun, ["Input", idx,     "_20x" ])
TFX_DMSO = reduce(fun, ["TFX",   idx[1:], "_DMSO"])
TFX_DEX  = reduce(fun, ["TFX",   idx[1:], "_Dex" ])
SAMPLES  = np.concatenate([INPUT, INPUT20X, TFX_DMSO, TFX_DEX])

### file path of database
fdiry = os.path.join(FD_RES, 'database')
fname = "fragment.db"
FPATH_DB = os.path.join(fdiry, fname)
print(FPATH_DB)

/mount/work/out/proj_combeffect/database/fragment.db


In [14]:
### set database connection
fpath_db = FPATH_DB
fpath_db = f"file:{fpath_db}?mode=ro"
print(fpath_db)

### query from the database
with sqlite3.connect(fpath_db, uri=True) as conn:
    ### set query to query out the table names
    query_out = f"""
        SELECT name 
        FROM sqlite_master 
        WHERE type = 'table';
        """

    ### query from the database
    query  = query_out
    cursor = conn.cursor()
    cursor = cursor.execute(query)
    rows   = cursor.fetchall()

for row in rows:
    print(row)

file:/mount/work/out/proj_combeffect/database/fragment.db?mode=ro
('Sample',)
('Fragment',)
('Motif',)
('Count',)
('Coverage',)
('Annotation',)


In [49]:
def get_motifs(fpath_db=FPATH_DB):
    """..."""
    fpath_db = f"file:{fpath_db}?mode=ro"
    with sqlite3.connect(fpath_db, uri=True) as conn:
        ### set query: query all motifs in the table
        query_out = f"""
            SELECT DISTINCT Mtf.motif
            FROM     Motif Mtf
            ORDER BY Mtf.motif
           ;"""    
        
        ### query out motif bindings on the fragment
        query  = query_out
        cursor = conn.cursor()
        rows   = cursor.execute(query)
        
    return rows

In [50]:
def get_fragments_from_motif(mtf, fpath_db=FPATH_DB):
    """..."""
    fpath_db = f"file:{fpath_db}?mode=ro"
    with sqlite3.connect(fpath_db, uri=True) as conn:
        ### 
        cnames =  "fragment", "binding", "sample", "count"
        
        ### set query: query all motifs binding to a given fragment
        query_out = f"""
            SELECT Ant.fragment, Ant.binding, Cnt.sample, Cnt.count
            FROM   Annotation Ant
            JOIN   Motif      Mtf ON Ant.binding  = Mtf.binding
            JOIN   Count      Cnt ON Ant.fragment = Cnt.fragment
            WHERE  Mtf.motif  = '{mtf}'
           ;"""
        ### query out motif bindings on the fragment
        query  = query_out
        cursor = conn.cursor()
        rows   = cursor.execute(query)
        
    return cnames, rows

In [72]:
### https://www.techonthenet.com/sqlite/and_or.php
def get_fragments_from_pair(mtf1, mtf2, fpath_db=FPATH_DB):
    """..."""
    fpath_db = f"file:{fpath_db}?mode=ro"
    with sqlite3.connect(fpath_db, uri=True) as conn:
        ### 
        cnames =  "fragment", "binding", "sample", "count"
        
        ### set query: query all motifs binding to a given fragment
        query_out = f"""
            SELECT Ant.fragment, Ant.binding, Cnt.sample, Cnt.count
            FROM   Annotation Ant
            JOIN   Motif      Mtf ON Ant.binding  = Mtf.binding
            JOIN   Count      Cnt ON Ant.fragment = Cnt.fragment
            WHERE  (Mtf.motif  = '{mtf1}' AND Cnt.sample = 'Input1') OR
                   (Mtf.motif  = '{mtf2}' AND Cnt.sample = 'Input1')
            ORDER  BY Ant.fragment
           ;"""
        ### query out motif bindings on the fragment
        query  = query_out
        cursor = conn.cursor()
        rows   = cursor.execute(query)
        
    return cnames, rows

In [39]:
rows = get_motifs(FPATH_DB)
mtfs = it.islice(rows, 20)

for mtf in mtfs:
    print(mtf)

('AHR',)
('AIRE',)
('AP1/1',)
('AP1/2',)
('ARI5A',)
('ARI5B',)
('BATF',)
('BCL6/1',)
('BCL6/2',)
('CCAAT/CEBP',)
('CENBP',)
('CPEB1',)
('CREB/ATF/1',)
('CREB/ATF/2',)
('CREB/ATF/3',)
('CREB3/XBP1',)
('CTCF',)
('CUX/1',)
('CUX/2',)
('CUX/3',)


In [43]:
for sam in SAMPLES:
    print(sam)

Input1
Input2
Input3
Input4
Input5
Input1_20x
Input2_20x
Input3_20x
Input4_20x
Input5_20x
TFX2_DMSO
TFX3_DMSO
TFX4_DMSO
TFX5_DMSO
TFX2_Dex
TFX3_Dex
TFX4_Dex
TFX5_Dex


In [32]:
rows = get_motifs(FPATH_DB)
mtfs = it.islice(rows, 2)
mtf1 = next(mtfs)[0]
mtf2 = next(mtfs)[0]
sam  = SAMPLES[0]
print(mtf1, mtf2, sam)

AHR AIRE Input1_20x


In [56]:
cnames, rows = get_fragments_from_motif(mtf1)
print(cnames)

dct_count = defaultdict(lambda : 0)
rows = it.islice(rows, 10)
for row in rows:
    print(row)
    dct = dict(zip(cnames, row))
    sam = dct['sample']
    cnt = dct['count']
    dct_count[sam] += cnt

dct_count = dict(dct_count)
print(dct_count)

('fragment', 'binding', 'sample', 'count')
('chr17_8150329_8151343', 'chr17_8151284_8151290_AHR', 'Input1', 1)
('chr17_8150329_8151343', 'chr17_8151284_8151290_AHR', 'Input1_20x', 2)
('chr17_8150329_8151343', 'chr17_8151284_8151290_AHR', 'Input3', 1)
('chr17_8150329_8151343', 'chr17_8151284_8151290_AHR', 'Input3_20x', 1)
('chr17_8150329_8151343', 'chr17_8151284_8151290_AHR', 'Input5_20x', 1)
('chr17_8150402_8151376', 'chr17_8151284_8151290_AHR', 'Input1', 1)
('chr17_8150402_8151376', 'chr17_8151284_8151290_AHR', 'Input1_20x', 1)
('chr17_8150402_8151376', 'chr17_8151284_8151290_AHR', 'Input2_20x', 3)
('chr17_8150402_8151376', 'chr17_8151284_8151290_AHR', 'Input3', 1)
('chr17_8150402_8151376', 'chr17_8151284_8151290_AHR', 'Input3_20x', 1)
{'Input1': 2, 'Input1_20x': 3, 'Input3': 2, 'Input3_20x': 2, 'Input5_20x': 1, 'Input2_20x': 3}


In [45]:
dct_count = defaultdict(lambda : 0)
dct_count["X"]

0

In [74]:
#cnames, rows = get_fragments_from_pair(mtf1, mtf2)
cnames, rows = get_fragments_from_pair('AP1/1', 'CTCF')
print(cnames)

dct_count = defaultdict(lambda : 0)
#rows = it.islice(rows, 100)
for row in rows:
    print(row)
    dct = dict(zip(cnames, row))
    sam = dct['sample']
    cnt = dct['count']
    dct_count[sam] += cnt

dct_count = dict(dct_count)
print(dct_count)

('fragment', 'binding', 'sample', 'count')
('chr17_8148117_8149012', 'chr17_8148425_8148433_AP1/1', 'Input1', 1)
('chr17_8148117_8149012', 'chr17_8148608_8148622_CTCF', 'Input1', 1)
('chr17_8148117_8149012', 'chr17_8148673_8148687_CTCF', 'Input1', 1)
('chr17_8148117_8149012', 'chr17_8148877_8148901_CTCF', 'Input1', 1)
('chr17_8148117_8149012', 'chr17_8148917_8148925_AP1/1', 'Input1', 1)
('chr17_8148122_8149107', 'chr17_8148425_8148433_AP1/1', 'Input1', 1)
('chr17_8148122_8149107', 'chr17_8148608_8148622_CTCF', 'Input1', 1)
('chr17_8148122_8149107', 'chr17_8148673_8148687_CTCF', 'Input1', 1)
('chr17_8148122_8149107', 'chr17_8148877_8148901_CTCF', 'Input1', 1)
('chr17_8148122_8149107', 'chr17_8148917_8148925_AP1/1', 'Input1', 1)
('chr17_8148178_8149194', 'chr17_8148425_8148433_AP1/1', 'Input1', 1)
('chr17_8148178_8149194', 'chr17_8148608_8148622_CTCF', 'Input1', 1)
('chr17_8148178_8149194', 'chr17_8148673_8148687_CTCF', 'Input1', 1)
('chr17_8148178_8149194', 'chr17_8148877_8148901_CTCF',