In [28]:
import sys
sys.path.append('../')
from config_sing import *
show_env()

You are in: Singularity | singularity_proj_combeffect
    BASE DIRECTORY:     /mount/work
    PATH OF SOURCE:     /mount/work/source
    PATH OF EXECUTABLE: /mount/work/exe
    PATH OF ANNOTATION: /mount/work/annotation
    PATH OF PROJECT:    /mount/project
    PATH OF RESULTS:    /mount/work/out/proj_combeffect

Library imported:
    numpy, pandas, matplotlib.pyplot
    os, sys, time, gzip, glob



In [29]:
from sklearn import metrics
from sklearn.model_selection import train_test_split

import seaborn as sns
import xgboost as xgb
import shap
import json

from functools import partial, reduce
print = partial(print, flush=True)

### Set Samples
fun = np.core.defchararray.add
idx = np.arange(1,6).astype("str")

INPUT    = reduce(fun, ["Input", idx])
INPUT20X = reduce(fun, ["Input", idx,     "_20x"])
TFX_DMSO = reduce(fun, ["TFX",   idx[1:], "_DMSO"])
TFX_DEX  = reduce(fun, ["TFX",   idx[1:], "_Dex"])
SAMPLES  = np.concatenate([INPUT20X, TFX_DMSO, TFX_DEX])

In [30]:
def read_data(sample, fname, cnames):
    fdiry = os.path.join(FD_RES, "scratch", "region_dex_GR_P300_dnase_chr17")
    fname = f"{sample}_{fname}"
    fpath = os.path.join(fdiry, fname)
    dat   = pd.read_csv(fpath, sep = "\t", names=cnames)
    return dat

read_motif_score = partial(
    read_data, 
    fname="motif_score.tsv", 
    cnames=["Fragment", "Motif_Count"])

read_motif_count = partial(
    read_data, 
    fname="motif_count.tsv", 
    cnames=["Fragment", "Motif_Count"])

read_pct = partial(
    read_data, 
    fname="pct.tsv", 
    cnames=["Fragment", "GC"])

read_depth = partial(
    read_data, 
    fname="depth.tsv", 
    cnames=["Fragment", "Loc", "Depth"])

In [31]:
sample = TFX_DMSO[0]

dat_dmso_mtf_score = read_motif_score(sample)
dat_dmso_pct       = read_pct(sample)
dat_dmso_depth     = read_depth(sample)

In [32]:
dat1 = dat_dmso_mtf_score
dat2 = dat_dmso_pct
dat3 = dat_dmso_depth

print(dat1.shape)
print(dat2.shape)
print(dat3.shape)

print(np.all(dat1.Fragment == dat2.Fragment))
print(np.all(dat1.Fragment == dat3.Fragment))

(3452, 2)
(3452, 2)
(3452, 3)
True
True


In [33]:
idx = np.where(dat1.Fragment != dat3.Fragment)[0]
print(idx)

[]


In [16]:
frg1 = dat1.Fragment[:340]
frg3 = dat3.Fragment[:340]
print(np.all(frg1 == frg3))

True


In [18]:
frg1 = dat1.Fragment[350:]
frg3 = dat3.Fragment[350:]
print(np.all(frg1 == frg3))

True


In [25]:
dat1.iloc[list(idx) + [350]]

Unnamed: 0,Fragment,Motif_Count
340,chr17_14736963_14737782,"{""TBX/3"": 6.7195, ""HEN1"": 12.6099, ""Ebox/CAGCT..."
341,chr17_8154668_8155710,"{""GC-tract"": 59.44449, ""IRF/2"": 41.4872, ""NFAC..."
342,chr17_8154803_8155791,"{""BATF"": 7.0544, ""GC-tract"": 37.88239, ""ZIC"": ..."
343,chr17_8154803_8155792,"{""BATF"": 7.0544, ""GC-tract"": 37.88239, ""ZIC"": ..."
344,chr17_8154803_8155793,"{""BATF"": 7.0544, ""GC-tract"": 37.88239, ""ZIC"": ..."
345,chr17_8154804_8155793,"{""BATF"": 7.0544, ""GC-tract"": 37.88239, ""ZIC"": ..."
346,chr17_8154805_8155793,"{""BATF"": 7.0544, ""GC-tract"": 37.88239, ""ZIC"": ..."
347,chr17_8154808_8155792,"{""BATF"": 7.0544, ""GC-tract"": 37.88239, ""ZIC"": ..."
348,chr17_8155161_8156042,"{""REL-halfsite"": 22.6359, ""TEAD"": 15.628, ""EBF..."
349,chr17_8155249_8156114,"{""LIN54"": 7.7918, ""NR/15"": 15.8898, ""MEF2"": 4...."


In [26]:
dat2.iloc[list(idx) + [350]]

Unnamed: 0,Fragment,GC
340,chr17_14736963_14737782,0.428571
341,chr17_8154668_8155710,0.5
342,chr17_8154803_8155791,0.493927
343,chr17_8154803_8155792,0.494439
344,chr17_8154803_8155793,0.494949
345,chr17_8154804_8155793,0.494439
346,chr17_8154805_8155793,0.493927
347,chr17_8154808_8155792,0.492886
348,chr17_8155161_8156042,0.514188
349,chr17_8155249_8156114,0.534104


In [27]:
dat3.iloc[list(idx) + [350]]

Unnamed: 0,Fragment,Loc,Depth
340,chr17_8154668_8155710,8155189,"{""Input"": 2.11599, ""Input_20x"": 3.05282, ""TFX_..."
341,chr17_8154803_8155791,8155297,"{""Input"": 2.2005, ""Input_20x"": 2.63865, ""TFX_D..."
342,chr17_8154803_8155792,8155297,"{""Input"": 2.2005, ""Input_20x"": 2.63865, ""TFX_D..."
343,chr17_8154803_8155793,8155298,"{""Input"": 2.2005, ""Input_20x"": 2.63865, ""TFX_D..."
344,chr17_8154804_8155793,8155298,"{""Input"": 2.2005, ""Input_20x"": 2.63865, ""TFX_D..."
345,chr17_8154805_8155793,8155299,"{""Input"": 2.2005, ""Input_20x"": 2.63865, ""TFX_D..."
346,chr17_8154808_8155792,8155300,"{""Input"": 2.25009, ""Input_20x"": 2.64724, ""TFX_..."
347,chr17_8155161_8156042,8155601,"{""Input"": 1.67548, ""Input_20x"": 2.14099, ""TFX_..."
348,chr17_8155249_8156114,8155681,"{""Input"": 1.81496, ""Input_20x"": 2.35417, ""TFX_..."
349,chr17_14736963_14737782,14737372,"{""Input"": 2.10629, ""Input_20x"": 1.84544, ""TFX_..."
