# TripletConfidenceGenerator
### Finds triplets by confidence in the training, validation, or testing sets.
### In other words, it takes three enhancers, predicts, and takes the confidence of the ensemble in that prediction as the confidence of the triplet.

## Imports

In [1]:
import os
import pandas as pd 
import numpy as np
import data
from Bio import SeqIO
import TripletClassifierEnsemble
from itertools import permutations
import random

## Files

In [2]:
data_dir = '../Data/Datasets/All/'
out_dir = f'{data_dir}/Models/'

train_sequence = f'{data_dir}/train_sequences.fa'
train_triplet_dis = f'{data_dir}/train_triplet_dis.npy'
train_triplet_sim = f'{data_dir}/train_triplet_sim.npy'

valid_sequence = f'{data_dir}/valid_sequences.fa'
valid_triplet_dis = f'{data_dir}/valid_triplet_dis.npy'
valid_triplet_sim = f'{data_dir}/valid_triplet_sim.npy'

test_sequence = f'{data_dir}/test_sequences.fa'
test_triplet_dis = f'{data_dir}/test_triplet_dis.npy'
test_triplet_sim = f'{data_dir}/test_triplet_sim.npy'

In [3]:
fantom_dir = '../Data/Datasets/Fantom/'
valid_matrix_path = f'{fantom_dir}/valid.usage.matrix'
train_matrix_path = f'{fantom_dir}/train.usage.matrix'
test_matrix_path = f'{fantom_dir}/test.usage.matrix'
id_path = f'{fantom_dir}/name_id.txt'

In [4]:
chrom_dir = "../Data/HG38/Chromosomes/"
triplet_out_dir = "../Data/Triplets/"
gnm_out_dir = "../Data/GNM/"

## Parameters

In [5]:
d1, d2, d3 = 4, 600, 3
model_count = 29
mode = 'valid' # out of 'train', 'valid', and 'test', gets triplets from given set

## Loading Data and ensemble

In [6]:
if mode == 'train':
    d = data.FantomToOneHotConverter(train_sequence, 0, d2).seq_matrix
    similar_triplets = np.load(train_triplet_sim)
elif mode == 'valid':
    d = data.FantomToOneHotConverter(valid_sequence, 0, d2).seq_matrix
    similar_triplets = np.load(valid_triplet_sim)
elif mode == 'test':
    d = data.FantomToOneHotConverter(test_sequence, 0, d2).seq_matrix
    similar_triplets = np.load(test_triplet_sim)
else:
    raise RuntimeError(f"{mode} is not a valid mode! Use train, valid, or test!")

In [8]:
def make_triplet(i1, i2, i3):
    return np.transpose(np.array([d[i1], d[i2], d[i3]]), (1, 2, 0)).reshape(1, d1, d2, d3)

In [10]:
ensemble = TripletClassifierEnsemble.TripletClassifierEnsemble(out_dir, (d1,d2,d3))
ensemble.load_model_info()
ensemble.load_models(model_count)

Loading models 29/29


# Loading matrices

In [11]:
train_matrix = pd.read_csv(train_matrix_path, sep = '\t')

In [12]:
valid_matrix = pd.read_csv(valid_matrix_path, sep = '\t')
vm = valid_matrix.to_numpy()

In [13]:
test_matrix = pd.read_csv(test_matrix_path, sep = '\t')

In [15]:
all_matrix = pd.concat([train_matrix, valid_matrix, test_matrix], join='inner')
am = all_matrix.to_numpy()

# Loading Cell Types

In [17]:
id_name_dict = {}
with open(id_path, 'r') as f:
    line = f.readline()
    while line:
        data = line.split()
        key = data[-1]
        value = ' '.join(data[:-1])
        id_name_dict[key] = value
        line = f.readline()
        
list(id_name_dict.items())[:10]

[('CNhs10722', 'acute myeloid leukemia (FAB M5) cell line:THP-1 (fresh)'),
 ('CNhs10723', 'acute myeloid leukemia (FAB M5) cell line:THP-1 (revived)'),
 ('CNhs10724', 'acute myeloid leukemia (FAB M5) cell line:THP-1 (thawed)'),
 ('CNhs10726', 'lung adenocarcinoma cell line:PC-14'),
 ('CNhs10727', 'chronic myelogenous leukemia cell line:KU812'),
 ('CNhs10728', 'extraskeletal myxoid chondrosarcoma cell line:H-EMC-SS'),
 ('CNhs10729', 'renal cell carcinoma cell line:OS-RC-2'),
 ('CNhs10730', 'malignant trichilemmal cyst cell line:DJM-1'),
 ('CNhs10731', 'glioma cell line:GI-1'),
 ('CNhs10732', 'maxillary sinus tumor cell line:HSQ-89')]

# Finding good cell types
### Whenever a group of tissues that the user wants to use appears, type 'y' and press enter.
### To check the next group of tissues, press enter

In [18]:
is_done = False
while not is_done:
    for x in range(len(similar_triplets)):
        anchor = similar_triplets[x, 0, 0]
        anchor_row = am[anchor]
        cols = np.where((anchor_row == 1))[0]
        is_aortic = False
        name_list = [id_name_dict[all_matrix.columns[c]] for c in cols]

        if "iPS differentiation to neuron, control donor C32-CRL1502, day00, rep1" in name_list and "Natural Killer Cells, donor1" in name_list and "Natural Killer Cells, donor3" in name_list and "Basophils, donor2" in name_list and "CD8+ T Cells (pluriselect), donor090325, donation1" in name_list:

            bad = False
            for name in name_list:
                print(name)
            is_aortic = not bad

        if is_aortic:

            usr = input()
            if usr == "y":
                print(x, cols)
                is_done = True
                break

acute lymphoblastic leukemia (B-ALL) cell line:BALL-1
acute lymphoblastic leukemia (B-ALL) cell line:NALM-6
acute lymphoblastic leukemia (T-ALL) cell line:HPB-ALL
acute lymphoblastic leukemia (T-ALL) cell line:Jurkat
acute myeloid leukemia (FAB M0) cell line:KG-1
acute myeloid leukemia (FAB M1) cell line:HYT-1
acute myeloid leukemia (FAB M3) cell line:HL60
acute myeloid leukemia (FAB M4eo) cell line:EoL-1
acute myeloid leukemia (FAB M4eo) cell line:EoL-3
acute myeloid leukemia (FAB M5) cell line:THP-1 (thawed)
acute myeloid leukemia (FAB M5) cell line:U-937 DE-4
acute myeloid leukemia (FAB M6) cell line:EEB
adenocarcinoma cell line:IM95m
adrenal cortex adenocarcinoma cell line:SW-13
adult T-cell leukemia cell line:ATN-1
alveolar cell carcinoma cell line:SW 1573
anaplastic carcinoma cell line:8305C
anaplastic large cell lymphoma cell line:Ki-JK
argyrophil small cell carcinoma cell line:TC-YIK
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep1
B lymphoblastoid cell line: GM12878 ENCOD

 


acute lymphoblastic leukemia (T-ALL) cell line:HPB-ALL
acute myeloid leukemia (FAB M1) cell line:HYT-1
acute myeloid leukemia (FAB M2) cell line:NKM-1
acute myeloid leukemia (FAB M3) cell line:HL60
acute myeloid leukemia (FAB M4eo) cell line:EoL-1
acute myeloid leukemia (FAB M4eo) cell line:EoL-3
acute myeloid leukemia (FAB M5) cell line:NOMO-1
acute myeloid leukemia (FAB M5) cell line:P31/FUJ
acute myeloid leukemia (FAB M5) cell line:THP-1 (fresh)
acute myeloid leukemia (FAB M5) cell line:U-937 DE-4
acute myeloid leukemia (FAB M6) cell line:EEB
acute myeloid leukemia (FAB M7) cell line:M-MOK
adult T-cell leukemia cell line:ATN-1
anaplastic carcinoma cell line:8305C
anaplastic large cell lymphoma cell line:Ki-JK
argyrophil small cell carcinoma cell line:TC-YIK
b cell line:RPMI1788
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep1
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep2
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep3
bile duct carcinoma cell line:HuCCT1
biphenoty

 


adult T-cell leukemia cell line:ATN-1
anaplastic carcinoma cell line:8305C
anaplastic large cell lymphoma cell line:Ki-JK
b cell line:RPMI1788
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep1
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep2
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep3
breast carcinoma cell line:MDA-MB-453
carcinoid cell line:NCI-H727
cervical cancer cell line:ME-180
chronic lymphocytic leukemia (T-CLL) cell line:SKW-3
colon carcinoma cell line:CACO-2
colon carcinoma cell line:COLO-320
epithelioid sarcoma cell line:HS-ES-1
epitheloid carcinoma cell line: HelaS3 ENCODE, biol_rep1
epitheloid carcinoma cell line: HelaS3 ENCODE, biol_rep2
epitheloid carcinoma cell line: HelaS3 ENCODE, biol_rep3
gall bladder carcinoma cell line:TGBC14TKB
gall bladder carcinoma cell line:TGBC2TKB
gastric adenocarcinoma cell line:MKN45
gastric cancer cell line:GSS
hairy cell leukemia cell line:Mo
Hep-2 cells mock treated, biol_rep1
Hep-2 cells mock treated, biol_rep2
Hep-2 c

 


acute myeloid leukemia (FAB M1) cell line:HYT-1
acute myeloid leukemia (FAB M3) cell line:HL60
acute myeloid leukemia (FAB M4eo) cell line:EoL-1
acute myeloid leukemia (FAB M4eo) cell line:EoL-3
acute myeloid leukemia (FAB M5) cell line:U-937 DE-4
acute myeloid leukemia (FAB M6) cell line:EEB
anaplastic large cell lymphoma cell line:Ki-JK
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep1
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep2
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep3
biphenotypic B myelomonocytic leukemia cell line:MV-4-11
breast carcinoma cell line:MDA-MB-453
Burkitt's lymphoma cell line:DAUDI
carcinoid cell line:SK-PN-DW
chronic myeloblastic leukemia (CML) cell line:KCL-22
chronic myelogenous leukemia cell line:KU812
colon carcinoma cell line:COLO-320
diffuse large B-cell lymphoma cell line:CTB-1
embryonic kidney cell line: HEK293/SLAM infection, 24hr
embryonic kidney cell line: HEK293/SLAM untreated
epitheloid carcinoma cell line: HelaS3 ENCODE, biol_r

 


acute myeloid leukemia (FAB M1) cell line:HYT-1
acute myeloid leukemia (FAB M2) cell line:Kasumi-6
acute myeloid leukemia (FAB M3) cell line:HL60
acute myeloid leukemia (FAB M5) cell line:THP-1 (thawed)
acute myeloid leukemia (FAB M7) cell line:M-MOK
b cell line:RPMI1788
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep1
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep2
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep3
Burkitt's lymphoma cell line:DAUDI
Burkitt's lymphoma cell line:RAJI
cord blood derived cell line:COBL-a 24h infection(-C)
cord blood derived cell line:COBL-a 24h infection
cord blood derived cell line:COBL-a untreated
diffuse large B-cell lymphoma cell line:CTB-1
ductal cell carcinoma cell line:KLM-1
embryonic kidney cell line: HEK293/SLAM infection, 24hr
hepatoma cell line:Li-7
hereditary spherocytic anemia cell line:WIL2-NS
large cell non-keratinizing squamous carcinoma cell line:SKG-II-SF
lymphoma, malignant, hairy B-cell cell line:MLMA
mesothelioma cell l

 


acute lymphoblastic leukemia (B-ALL) cell line:BALL-1
acute lymphoblastic leukemia (T-ALL) cell line:HPB-ALL
acute myeloid leukemia (FAB M5) cell line:THP-1 (fresh)
adult T-cell leukemia cell line:ATN-1
anaplastic carcinoma cell line:8305C
b cell line:RPMI1788
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep1
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep2
B lymphoblastoid cell line: GM12878 ENCODE, biol_rep3
Burkitt's lymphoma cell line:DAUDI
cervical cancer cell line:D98-AH2
cervical cancer cell line:ME-180
cholangiocellular carcinoma cell line:HuH-28
ductal cell carcinoma cell line:KLM-1
embryonic kidney cell line: HEK293/SLAM infection, 24hr
epitheloid carcinoma cell line: HelaS3 ENCODE, biol_rep3
gastric cancer cell line:AZ521
Hep-2 cells treated with Streptococci strain 5448, biol_rep2
Hep-2 cells treated with Streptococci strain JRS4, biol_rep1
hepatic mesenchymal tumor cell line:LI90
hepatoma cell line:Li-7
hereditary spherocytic anemia cell line:WIL2-NS
Hodgkin's lym

 y


1004 [   2    4   18   29   31   36   37   38   39   51   57   58   59   80
   82   97  106  122  124  127  132  133  134  152  153  167  182  184
  186  187  189  200  206  207  213  226  232  249  257  258  274  275
  276  277  278  279  280  281  282  283  286  290  294  295  297  301
  315  319  330  360  363  364  365  374  375  376  380  381  382  383
  384  385  386  387  388  389  390  391  392  393  394  395  396  397
  398  399  400  401  402  403  404  405  411  412  414  423  448  452
  453  457  466  473  475  476  477  478  479  480  494  495  497  498
  499  501  502  503  504  505  509  510  511  512  513  515  524  528
  529  532  546  554  568  570  572  578  580  588  595  596  602  612
  613  614  617  619  622  623  624  637  638  639  644  647  649  650
  651  652  653  654  655  656  659  661  663  665  668  670  674  675
  676  697  699  701  707  709  710  711  712  722  724  737  745  747
  748  751  755  761  762  773  779  785  786  790  792  797  805  806
 

# Getting similar triplet

In [20]:
confidence_table = {60: [], 70: [], 80: [], 90: []}
triplet_list = []
for i in range(similar_triplets.shape[-1]):
    triplet = make_triplet(*similar_triplets[x, [0, 1, 2], i])
    triplet_list.append(triplet)

p, c = ensemble.predict(np.concatenate(triplet_list), use_count = model_count, is_loaded = True)
for i in range(len(c)):
    for j in range(60, 100, 10):
        if j / 100.0 <= c[i] < (j + 10) / 100.0:
            confidence_table[j].append((similar_triplets[x, [0, 1, 2], i], c[i]))
            break



# For each confidence level, get the overlapping
### For each confidence interval (60, 70, 80, 90), each triplet with the corresponding confidence is checked for the overlapping tissues.
### These tissues are printed out. Whenever a group of overlapping tissues looks good, type 'y' and press enter.
### To check the next group of tissues, press enter.

In [23]:
cell_list = []
enhancer_list = []

for level in confidence_table.keys():
    l = confidence_table[level]
    
    is_confirm = False
    
    while not is_confirm:
        print("--------------------\n")

        for i in range(len(l)):
            
                triplet = l[i][0] if len(l) > 0 else None
                if triplet is None:
                    continue


                anchor_index, positive_index, similar_index = triplet[0], triplet[1], triplet[2]
                ar, pr, sr = vm[anchor_index], vm[positive_index], vm[similar_index]
                
                overlapping = np.where((ar == 1) & (pr == 1) & (sr == 1))[0]
                print(f"Overlapping: {overlapping}")
                print(f"Cell types:")
                for name in all_matrix.columns[overlapping]:
                    print(id_name_dict[name])

                usr = input()

                if usr == "s":
                    break
                elif usr != "y":
                    continue

                is_confirm = True
                cell_list.append([id_name_dict[name] for name in all_matrix.columns[overlapping]])
                enhancer_list.append((ar[0], pr[0], sr[0]))

                l = []
                for row in am:
                    if 1 in row[overlapping]:
                        l.append(row[0])

                l = [label.replace(':', '\t').replace('-','\t') for label in l if "chr21:" in label]
                l.sort(key = lambda data: int(data.split("\t")[1]))
                with open(f'{level}_gt.bed',"w") as file:
                    for line in l:
                        file.write(line + "\n")

                break

--------------------

Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1599]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)


 


Overlapping: [1600]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep3 (A3 T4)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1596]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)


 


Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1608]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep2 (A2 T7)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1589]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)


 


Overlapping: [1608]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep2 (A2 T7)


 


Overlapping: [1586 1589 1593 1596 1600 1608 1614 1628 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr30min, biol_rep2 (A2 T2)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep3 (A3 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep2 (A2 T7)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce

 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1621]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 08hr, biol_rep3 (A3 T11)


 


Overlapping: [1607]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep1 (A1 T7)


 


Overlapping: [1595 1599 1600 1613 1621]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep3 (A3 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep1 (A1 T9)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 08hr, biol_rep3 (A3 T11)


 


Overlapping: [457]
Cell types:
Fibroblast - Choroid Plexus, donor1


 


Overlapping: [1608]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep2 (A2 T7)


 


Overlapping: [457]
Cell types:
Fibroblast - Choroid Plexus, donor1


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1589]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)


 


Overlapping: [1600]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep3 (A3 T4)


 


Overlapping: [1587 1595 1613]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep2 (A2 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep1 (A1 T9)


 


Overlapping: [1628]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1599]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)


 


Overlapping: [1586 1589 1595 1596 1599 1614 1628 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1628]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)


 


Overlapping: [1628]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)


 


Overlapping: [1586 1628]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)


 


Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1599]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)


 


Overlapping: [1586 1589 1596 1613]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep1 (A1 T9)


 y


--------------------

Overlapping: [1628]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)


 


Overlapping: [1593 1595 1596 1628]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr30min, biol_rep2 (A2 T2)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)


 


Overlapping: [457]
Cell types:
Fibroblast - Choroid Plexus, donor1


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1589 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 y


--------------------

Overlapping: [1628 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day07, biol_rep1 (A1 T14)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1604]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1607]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep1 (A1 T7)


 


Overlapping: [1596]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)


 


Overlapping: [1586 1587 1589 1595 1599 1600 1604 1607 1608 1613 1621 1628 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep2 (A2 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep3 (A3 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep1 (A1 T7)
Saos-2 osteosarcoma treated with asc

 


Overlapping: [1586 1593 1604]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr30min, biol_rep2 (A2 T2)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1607]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep1 (A1 T7)


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1608]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep2 (A2 T7)


 


Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1586 1589 1604 1613 1614 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep1 (A1 T9)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1589 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 y


--------------------

Overlapping: [1599 1604]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)


 


Overlapping: [457]
Cell types:
Fibroblast - Choroid Plexus, donor1


 


Overlapping: [1599]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1595]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1586 1587 1596 1599 1607 1621 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep2 (A2 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep1 (A1 T7)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 08hr, biol_rep3 (A3 T11)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1595 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


--------------------

Overlapping: [1599 1604]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)


 


Overlapping: [457]
Cell types:
Fibroblast - Choroid Plexus, donor1


 


Overlapping: [1599]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1595]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1586 1587 1596 1599 1607 1621 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep2 (A2 T0)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 02hr00min, biol_rep1 (A1 T7)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 08hr, biol_rep3 (A3 T11)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1614]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep2 (A2 T9)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


Overlapping: [1595 1637]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep1 (A1 T3)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)


 


Overlapping: [1643]
Cell types:
adipose tissue, adult, pool1


 


--------------------

Overlapping: [1599 1604]
Cell types:
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr00min, biol_rep2 (A2 T4)
Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 01hr40min, biol_rep1 (A1 T6)


 


Overlapping: [457]
Cell types:
Fibroblast - Choroid Plexus, donor1


 y


In [25]:
cell_list

[['Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr00min, biol_rep1 (A1 T0)',
  'Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)',
  'Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr45min, biol_rep2 (A2 T3)',
  'Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 03hr, biol_rep1 (A1 T9)'],
 ['adipose tissue, adult, pool1'],
 ['Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, 00hr15min, biol_rep1 (A1 T1)',
  'Saos-2 osteosarcoma treated with ascorbic acid and BGP to induce calcification, day28, biol_rep1 (A1 T17)'],
 ['Fibroblast - Choroid Plexus, donor1']]

In [26]:
enhancer_list

[('chr17:77129401-77129660',
  'chr9:84170890-84171110',
  'chr17:13544352-13544777'),
 ('chr17:77129401-77129660',
  'chr2:88600082-88600592',
  'chr2:237156465-237156786'),
 ('chr17:77129401-77129660',
  'chr6:98947434-98947556',
  'chrX:40099154-40099469'),
 ('chr17:77129401-77129660',
  'chr3:35401896-35402248',
  'chr10:91887111-91887689')]

## Writes To File
### Every permutation of a triplet is written to a FASTA file.
### Note that only the orignal triplet (saved with _0.fa) is guaranteed to be of the proper confidence interval.
### Further, a region of size 10,000 around the third enhancer of a triplet permutation in the human genome is collected and saved to a FASTA file.

In [27]:
for level, cell_types, enhancers in zip(confidence_table.keys(), cell_list, enhancer_list):
    
    i = 0
    for order in permutations(enhancers):
        with open(f"{triplet_out_dir}/{level}_triplet_{i}.fa", 'w') as f:
            for enhancer in order:
                f.write(f"> {enhancer} {cell_types}\n")
                chrom, regions = enhancer.split(":")
                start, end = [int(x) for x in regions.split("-")]

                seq = str(SeqIO.read(f"{chrom_dir}/HG38_{chrom}.fa", "fasta").seq)[start:end]
                f.write(seq + "\n")
                
        with open(f"{gnm_out_dir}/{level}/seq_{i}.fa", 'w') as f:
            enhancer = order[-1]
            f.write(f"> {enhancer}\n")
            chrom, regions = enhancer.split(":")
            start, end = [int(x) for x in regions.split("-")]

            size = end - start
            middle = int(size // 2) + start
            start = middle - 5000
            end = middle + 5000

            seq = str(SeqIO.read(f"{chrom_dir}/HG38_{chrom}.fa", "fasta").seq)[start:end]

            f.write(seq)
            
        
        
        i += 1