In [1]:
import pandas as pd
import numpy as np
import os, sys, pysam, gzip, subprocess, shlex, time
from datetime import datetime
from tqdm import tqdm
from multiprocessing import Pool
from sklearn.preprocessing import OneHotEncoder

def give_inputs(cell_line):
    
    files_path = "/lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/{}".format(cells)
    
    if cell_line == "HEK293T":  
        samples = [["outTable_599710609", "outTable_905657585", "outTable_530905096"],
                   ["outTable_572868058", "outTable_364841872", "outTable_814257267"],
                   ["outTable_110067244", "outTable_597789462", "outTable_208420383"]]

        rmsk_file = "rmsk_hg38.sorted.gtf.gz"
        refseq_file = "hg38.110.ncbiRefSeq.sorted.gtf.gz"

    elif cell_line == "HEK":
        samples = [["outTable_724242056", "outTable_816573740"],
                   ["outTable_580067564", "outTable_718392497"],
                   ["outTable_181728208", "outTable_854894021"]]
        rmsk_file = "rmsk.sorted.gtf.gz"
        refseq_file = "hg19.ncbiRefSeq.sorted.gtf.gz"

    elif cell_line == "a549":
        samples = [["outTable_192318299", "outTable_436061877"],
                   ["outTable_535670354", "outTable_396704193"],
                   ["outTable_773331943", "outTable_302610513"]]
        rmsk_file = "rmsk.sorted.gtf.gz"
        refseq_file = "hg19.ncbiRefSeq.sorted.gtf.gz"

    else:
        samples= [["outTable_853538513", "outTable_921089530"]]
        rmsk_file = "rmsk.sorted.gtf.gz"
        refseq_file = "hg19.ncbiRefSeq.sorted.gtf.gz"
    
    return samples, rmsk_file, refseq_file, files_path

def extraction(prefix,  AG_min, AGfreq_threshold, cov_threshold, interval):    
    
    starttime = datetime.now()

    editing = []
                                         
    with gzip.open(prefix+".gz") as redi:
        for c,l in enumerate(redi):
            line = l.decode("utf-8").rstrip().split("\t")
            if line[0].find("chr") != -1:
                if line[2] == "A":
                    if line[4] != "-":
                        if int(line[4]) >= cov_threshold:
                            if "AG" in line[7]:    
                                AG_rna = eval(line[6])[2]/sum(eval(line[6]))
                                if AG_rna >= AGfreq_threshold:
                                    if eval(line[6])[2] >= AG_min:
                                        editing.append(line)

            if c % 50000000 == 0:
                print(f"\tSites evaluated: {c}")
    print("Total evaluated rows:", c)
    editing = pd.DataFrame(editing)
    stoptime = datetime.now()
    print(f"[{datetime.now()}] Extraction of Editing Candidates finished for current sample. Elapsed time: {stoptime-starttime}.")
    columns = ["Region", "Position", "Ref", "Strand", "Cov", "Qual", "Bases", "AllSubs", "Freq", "gCov", "gQual", "g[A,C,G,T]", "gAllSubs", "gFreq"]
    editing.columns = columns
    print(f"[{datetime.now()}] Starting extraction of intervals.")
    ohe = OneHotEncoder()
    ohe.fit(np.array(["A", "C", "G", "T"]).reshape(-1, 1))

    intervals = []
    starttime_preds = datetime.now()
    total_extracted = 0
    features_extracted_filepath = prefix+ "_feature_vectors.tsv"
    features_extracted = open(features_extracted_filepath, "w")

    df = editing.query("Region != 'chrM'")
    print(f"[{datetime.now()}] Loading reditable with tabix and pysam:", prefix)
    start_time = datetime.now()
    srr = pysam.TabixFile(prefix+".gz")
    with tqdm(total=df.shape[0], position=0, leave=True) as pbar:
        for site in df.itertuples():
            start = int(site.Position) - ((interval-1)/2)
            stop = int(site.Position) + ((interval-1)/2)
            AGrna = eval(site.Bases)[2]/sum(eval(site.Bases))
            srr_interval = []
            for s in srr.fetch(site.Region, start-1, stop):
                srr_interval.append(s.split("\t"))
            srr_interval = pd.DataFrame(srr_interval, columns=columns)
            if srr_interval.shape[0] == interval and len(set(srr_interval["Strand"])) == 1:
                intervals.append([site.Region, site.Position, site.Ref, site.Strand, AGrna, site.Bases, start, stop, stop-start + 1, srr_interval.shape[0]])
                total_extracted += 1
                strand = site.Strand
                seq = srr_interval.Ref.values.reshape(-1,1)
                seq_ohe = ohe.transform(seq).toarray().T
                vects_freqs = []
                strands = []
                vects = []
                for vect in srr_interval["Bases"]:
                    vect = np.array(eval(vect))
                    cov = sum(vect)
                    vect_freqs = vect / cov
                    vects_freqs.append(vect_freqs)
                    vects.append(vect)
                vects_freqs = np.array(vects_freqs).T
                vects = np.array(vects).T
                encoded_site = pd.concat([pd.DataFrame(seq_ohe), pd.DataFrame(vects_freqs)])
                encoded_site.reset_index(drop=True, inplace=True)
                if strand == 0: 
                    encoded_site = pd.DataFrame(np.flip(encoded_site.values, axis=1))
                encoded_site.to_csv(features_extracted, mode="a", sep="\t", header = None, index=None)
            pbar.update(1)
    intervals = pd.DataFrame(intervals)
    stop_time_global = datetime.now()
    print(f"[{datetime.now()}] Features Extraction Finished. Elapsed time {datetime.now()-starttime_preds}.")
    features_extracted.close()
    
    intervals.columns = ["Region", "Position", "RefBase", "Strand", "FreqAGrna", "BasesCounts", "Start", "Stop", "Intlen", "TabixLen"]
    intervals.to_csv(prefix + "_intervals.tsv", sep="\t", index=None)
    print(f"[{datetime.now()}] Computation Finished. Total Elapsed time: {datetime.now()-starttime}")

def candidates_bona_fide_extraction(name1, name2, path, cells, AG_min, AGfreq_threshold, cov_threshold, rna_cov_threshold):    

    starttime = datetime.now()
    sites = []
    
    wgs = pysam.TabixFile(f"{path}/{cells}_WGS.gz")   
    inactive = pysam.TabixFile(f"{path}/{name2}.gz")
    with gzip.open(f"{path}/{name1}.gz") as redi:
        for c,l in enumerate(redi):
            line = l.decode("utf-8").rstrip().split("\t")
            if line[0].find("chr") != -1:
                if line[0] != "chrM":
                    strand = int(line[3])
                    if strand != 2:
                        if line[2] == "A":
                            if line[4] != "-":
                                if int(line[4]) >= rna_cov_threshold:
                                    if line[7] == "AG":    
                                        AG_rna =  float(line[8])  
                                        if AG_rna >= AGfreq_threshold:
                                            if eval(line[6])[2] >= AG_min:
                                                region = line[0]
                                                start = int(line[1])-1
                                                stop = int(line[1])
                                                for ROW_inactive in inactive.fetch(region, start, stop):
                                                    row_inactive = ROW_inactive.split("\t")
                                                    if row_inactive[4] != "-":
                                                        if int(row_inactive[3]) == strand:
                                                            if int(row_inactive[4]) >= rna_cov_threshold:
                                                                sub_inactive = row_inactive[7]
                                                                for ROW_WGS in wgs.fetch(region, start, stop):
                                                                    row_wgs = ROW_WGS.split("\t")
                                                                    if row_wgs[9] !=  "-":
                                                                        if int(row_wgs[3]) == strand:
                                                                            if int(row_wgs[9])>=cov_threshold:
                                                                                sub_wgs = row_wgs[12]
                                                                                if sub_inactive == "-":
                                                                                    if sub_wgs == "-":
                                                                                        sites.append([line[0], line[1], 1])
                                                                                else:
                                                                                    if sub_inactive == "AG":
                                                                                        inactive_AG = eval(row_inactive[6])[2]
                                                                                        if inactive_AG >= AG_min:
                                                                                            inactive_freq = float(row_inactive[8]) 
                                                                                            if inactive_freq >= AGfreq_threshold:
                                                                                                if sub_wgs == "AG":
                                                                                                    sites.append([line[0], line[1], 0])
                                                                                                    
                                                                                                               
            if c % 50000000 == 0:
                print(f"\tSites evaluated: {c}")
     
    sites = pd.DataFrame(sites)
    sites.columns = ["Region", "Position", "Class"]
    positives = sites[sites.loc[:, "Class"]==1].shape[0]
    negatives = sites[sites.loc[:, "Class"]==0].shape[0]
    print("Total evaluated rows:", c)
    stoptime = datetime.now()
    
    print(f"[{datetime.now()}] Extraction of candidates bonafide sites finished for {name1} {name2} samples. Elapsed time: {stoptime-starttime}.")
    sites.to_csv(f"{path}/{name1}_{name2}_candidates_bona_fide_sites.tsv", sep="\t", index=None)
                                                                              
def bonafide_identification(path, rmsk, refseq):
                           
    u_path = "/lustrehome/pietrolucamazzacuva/filezilla-recas/scripts/utilities"                                                                            
                                                                                 
    for file_name in os.listdir(path):
        if file_name.find("_candidates_bona_fide_sites.tsv") !=-1:
            df = pd.read_csv(os.path.join(path, file_name), sep="\t")
            df.to_csv(os.path.join(path, file_name), sep="\t", index=None, header=False)
            name = file_name.replace(".tsv", "")
            cmd_sh = "python3 {}/AnnotateTablePython3.py -a {}/{} -n rmsk -i {}/{}.tsv -o {}/{}.out.rmsk -u".format(u_path, u_path, rmsk, path, name, path, name)
            args = shlex.split(cmd_sh)
            p = subprocess.Popen(args, env=dict(os.environ, PATH="/lustrehome/pietrolucamazzacuva/anaconda3/envs/tensorflow-gpu/bin"))

    time.sleep(300)

    for name in os.listdir(path):
        if name.find("_candidates_bona_fide_sites.out.rmsk") != -1:
            cmd_sh = "python3 {}/AnnotateTablePython3.py -a {}/{} -i {}/{} -o {}/{}.refseq -u".format(u_path, u_path, refseq, path, name, path, name)
            args = shlex.split(cmd_sh)
            p = subprocess.Popen(args, env=dict(os.environ, PATH="/lustrehome/pietrolucamazzacuva/anaconda3/envs/tensorflow-gpu/bin"))

    time.sleep(300)

    cols = ["Region", "Position", "Class", "RMSK-Rep", "RMSK-Reg", "RefSeq-Rep", "RefSeq-Reg"]
    for file_name in os.listdir(path):
        if file_name.find("_candidates_bona_fide_sites.out.rmsk.refseq") !=-1:
            df = pd.read_table(os.path.join(path, file_name), header=None)
            name = file_name.replace(".out.rmsk.refseq", "_annoted.tsv")
            df.columns = cols
            df.to_csv(os.path.join(path, name), sep="\t", index=None)

    for file_name in os.listdir(path):
        if file_name.find("candidates_bona_fide_sites_annoted.tsv") !=-1:
            bona_fide = pd.read_csv(os.path.join(path, file_name), sep="\t")
            rep = bona_fide[(bona_fide.iloc[:, 3] != "-") & (bona_fide.iloc[:, 4] != "-")]
            non_rep = bona_fide[(bona_fide.iloc[:, 3] == "-") & (bona_fide.iloc[:, 4] == "-")]

            del bona_fide

            non_rep_n = non_rep[non_rep.iloc[:, 2]==0]
            non_rep_p = non_rep[non_rep.iloc[:, 2]==1]

            del non_rep

            non_rep_p = non_rep_p[(non_rep_p.iloc[:, 5] != "-") & (non_rep_p.iloc[:, 6] != "-")]
            bona_fide = pd.concat([rep, non_rep_n, non_rep_p])

            del rep, non_rep_n, non_rep_p

            name = file_name.replace("candidates_bona_fide_sites_annoted.tsv", "bona_fide_sites.tsv")
            bona_fide = bona_fide.sort_values(["Region", "Position"])
            bona_fide.to_csv(os.path.join(path, name), sep="\t", index=None)

In [2]:
min_dna_cov = 10
min_rna_cov = 50
min_AG_rate = 0.01
min_G = 3
seq_lenght = 101

cells = "a549"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)

inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)

inputs = []
for i in range(3):
    inputs.append([samples[i][0], samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov])

with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0	Sites evaluated: 0


	Sites evaluated: 0	Sites evaluated: 0

	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
Total evaluated rows: 216398797
[2024-08-12 07:59:07.329477] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:30.169493.
[2024-08-12 07:59:07.331840] Starting extraction of intervals

100%|██████████| 6631/6631 [00:20<00:00, 323.44it/s]


[2024-08-12 07:59:27.886180] Features Extraction Finished. Elapsed time 0:00:20.549562.
[2024-08-12 07:59:27.910228] Computation Finished. Total Elapsed time: 0:04:50.750274
Total evaluated rows: 235536788
[2024-08-12 07:59:35.856280] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:58.692904.
[2024-08-12 07:59:35.859263] Starting extraction of intervals.
[2024-08-12 07:59:35.883384] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_535670354


  1%|          | 126/16870 [00:00<00:53, 311.29it/s]

Total evaluated rows: 236217106
[2024-08-12 07:59:36.417003] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:59.252981.
[2024-08-12 07:59:36.419838] Starting extraction of intervals.
[2024-08-12 07:59:36.437291] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_773331943


  5%|▌         | 792/15698 [00:02<00:40, 369.08it/s]]

Total evaluated rows: 239780038
[2024-08-12 07:59:39.214045] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:05:02.050122.
[2024-08-12 07:59:39.217530] Starting extraction of intervals.
[2024-08-12 07:59:39.242668] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_396704193


  9%|▊         | 1347/15698 [00:04<00:43, 331.71it/s]

Total evaluated rows: 241316303


  6%|▌         | 514/8777 [00:01<00:24, 338.10it/s]

[2024-08-12 07:59:40.918696] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:05:03.759157.
[2024-08-12 07:59:40.921922] Starting extraction of intervals.
[2024-08-12 07:59:40.950694] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_192318299


 13%|█▎        | 2054/15698 [00:06<00:55, 246.11it/s]

Total evaluated rows: 241233789
[2024-08-12 07:59:43.449444] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:05:06.285300.
[2024-08-12 07:59:43.451417] Starting extraction of intervals.
[2024-08-12 07:59:43.465622] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_302610513


100%|██████████| 8777/8777 [00:27<00:00, 322.08it/s]]


[2024-08-12 08:00:06.556535] Features Extraction Finished. Elapsed time 0:00:27.334583.


 78%|███████▊  | 7246/9252 [00:23<00:05, 339.95it/s]]

[2024-08-12 08:00:06.594258] Computation Finished. Total Elapsed time: 0:05:29.430376

 43%|████▎     | 8111/19055 [00:25<00:34, 318.73it/s]




 73%|███████▎  | 11506/15698 [00:35<00:11, 379.43it/s]


[2024-08-12 08:00:12.422075] Features Extraction Finished. Elapsed time 0:00:28.967348.


 53%|█████▎    | 10065/19055 [00:31<00:21, 427.44it/s]

[2024-08-12 08:00:12.449557] Computation Finished. Total Elapsed time: 0:05:35.285431


100%|██████████| 15698/15698 [00:48<00:00, 324.75it/s]


[2024-08-12 08:00:24.845819] Features Extraction Finished. Elapsed time 0:00:48.423093.


 75%|███████▌  | 14306/19055 [00:43<00:13, 354.37it/s]

[2024-08-12 08:00:24.891639] Computation Finished. Total Elapsed time: 0:05:47.727654


100%|██████████| 16870/16870 [00:51<00:00, 329.99it/s]
 79%|███████▉  | 15071/19055 [00:46<00:11, 340.24it/s]

[2024-08-12 08:00:27.095515] Features Extraction Finished. Elapsed time 0:00:51.232344.
[2024-08-12 08:00:27.138574] Computation Finished. Total Elapsed time: 0:05:49.975239


100%|██████████| 19055/19055 [00:58<00:00, 326.67it/s]


[2024-08-12 08:00:39.410338] Features Extraction Finished. Elapsed time 0:00:58.483835.
[2024-08-12 08:00:39.459621] Computation Finished. Total Elapsed time: 0:06:02.300138
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
Total evaluated rows: 235536788
[2024-08-12 08:07:23.556904] Extraction of candidates bonafide sites finished for outTable_535670354 outTable_396704193 samples. Elapsed time: 0:06:44.011600.
Total evaluated rows: 236217106
[2024-08-12 08:07:23.690745] Extraction of candidates bonafide sites finished for outTable_773331943 outTable_302610513 samples. Elapsed time: 0:06:44.145326.
Total evaluated rows: 241316303
[2024-08-12 08:07:4

Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:07:47
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_192318299_outTable_436061877_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 08:08:10
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:07:47
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_773331943_outTable_302610513_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 08:08:10
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:07:47
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_535670354_outTable_396704193_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 08:08:11
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:09:44
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_d

In [4]:
min_dna_cov = 10
min_rna_cov = 30
min_AG_rate = 0.01
min_G = 2
seq_lenght = 101

cells = "HEK"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)
  
inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)
    
inputs = []
for i in range(3):
    inputs.append([samples[i][0], samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov])

with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0


Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:36:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_208420383_outTable_905657585_candidates_bona_fide_sites.out.rmsk.refseq
Script time --> END: 12/08/2024 08:38:09
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:36:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_814257267_outTable_364841872_candidates_bona_fide_sites.out.rmsk.refseq
Script time --> END: 12/08/2024 08:38:18
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 08:36:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_530905096_outTable_597789462_candidates_bona_fide_sites.out.rmsk.refseq
Script time --> END: 12/08/2024 08:38:27


	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 300000000
	Sites evaluated: 300000000
	Sites evaluated: 300000000
	Sites evaluated: 300000000
	Sites evaluated: 300000000
	Sites evaluated: 30000000

  7%|▋         | 375/5239 [00:05<00:53, 90.26it/s] 

Total evaluated rows: 422584480
[2024-08-12 08:46:46.515165] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:08:38.820314.
[2024-08-12 08:46:46.517307] Starting extraction of intervals.
[2024-08-12 08:46:46.534328] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_181728208


 89%|████████▊ | 4649/5239 [00:31<00:02, 216.62it/s]]

	Sites evaluated: 450000000


 56%|█████▌    | 7345/13076 [00:28<00:18, 309.93it/s]

Total evaluated rows: 452378572
[2024-08-12 08:47:15.440800] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:09:07.746514.
[2024-08-12 08:47:15.442702] Starting extraction of intervals.
[2024-08-12 08:47:15.457593] Loading reditable with tabix and pysam: 

 56%|█████▋    | 7377/13076 [00:28<00:19, 286.38it/s]

/lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_724242056


100%|██████████| 5239/5239 [00:35<00:00, 149.63it/s]


[2024-08-12 08:47:15.526787] Features Extraction Finished. Elapsed time 0:00:35.207589.
[2024-08-12 08:47:15.544342] Computation Finished. Total Elapsed time: 0:09:07.849617


 62%|██████▏   | 8130/13076 [00:31<00:17, 276.97it/s]

	Sites evaluated: 450000000


  9%|▉         | 773/8608 [00:03<00:30, 254.43it/s]s]

	Sites evaluated: 450000000


 10%|█         | 897/8608 [00:04<00:39, 196.55it/s]s]

	Sites evaluated: 450000000


100%|██████████| 13076/13076 [00:51<00:00, 255.20it/s]


[2024-08-12 08:47:37.931713] Features Extraction Finished. Elapsed time 0:00:51.410712.
[2024-08-12 08:47:37.967060] Computation Finished. Total Elapsed time: 0:09:30.272234


 62%|██████▏   | 5340/8608 [00:28<00:14, 230.01it/s]

Total evaluated rows: 469580145
[2024-08-12 08:47:44.168845] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:09:36.474413.
[2024-08-12 08:47:44.170603] Starting extraction of intervals.
[2024-08-12 08:47:44.182633] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_816573740


100%|██████████| 8608/8608 [00:43<00:00, 196.82it/s]


[2024-08-12 08:47:59.301538] Features Extraction Finished. Elapsed time 0:00:43.856148.


 43%|████▎     | 2855/6645 [00:15<00:14, 258.84it/s]

[2024-08-12 08:47:59.325195] Computation Finished. Total Elapsed time: 0:09:51.630926


 47%|████▋     | 3101/6645 [00:16<00:23, 148.37it/s]

Total evaluated rows: 483808594
[2024-08-12 08:48:00.794968] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:09:53.100391.
[2024-08-12 08:48:00.797112] Starting extraction of intervals.
[2024-08-12 08:48:00.811683] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_580067564


 94%|█████████▍| 6262/6645 [00:35<00:02, 175.79it/s]]

	Sites evaluated: 500000000


100%|██████████| 6645/6645 [00:37<00:00, 174.95it/s]]


[2024-08-12 08:48:22.281008] Features Extraction Finished. Elapsed time 0:00:38.107554.
[2024-08-12 08:48:22.317469] Computation Finished. Total Elapsed time: 0:10:14.623069


 65%|██████▍   | 8237/12737 [00:41<00:21, 204.65it/s]

Total evaluated rows: 519011879
[2024-08-12 08:48:42.545980] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:10:34.850990.
[2024-08-12 08:48:42.547871] Starting extraction of intervals.
[2024-08-12 08:48:42.560919] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_854894021


100%|██████████| 12737/12737 [01:03<00:00, 201.07it/s]


[2024-08-12 08:49:04.257302] Features Extraction Finished. Elapsed time 0:01:03.457092.
[2024-08-12 08:49:04.290489] Computation Finished. Total Elapsed time: 0:10:56.595939


100%|██████████| 8460/8460 [01:00<00:00, 140.87it/s]


[2024-08-12 08:49:42.779057] Features Extraction Finished. Elapsed time 0:01:00.228432.
[2024-08-12 08:49:42.805519] Computation Finished. Total Elapsed time: 0:11:35.110546
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 300000000
	Sites evaluated: 300000000
	Sites evaluated: 300000000
	Sites evaluated: 350000000
	Sites evaluated: 350000000
	Sites evaluated: 350000000
	Sites evaluated: 400000000
	Sites evaluated: 400000000
	Sites evaluated: 400000000
	Sites evaluated: 450000000
Total evaluated rows: 422584480
[2024-08-12 09:01:42.197117] Extracti

Pysam version used: 0.15.4
Script time --> START: 12/08/2024 09:02:07
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_724242056_outTable_816573740_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 09:02:26
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 09:02:07
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_580067564_outTable_718392497_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 09:02:30
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 09:02:07
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_181728208_outTable_854894021_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 09:02:36
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 09:04:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_data

In [5]:
min_dna_cov = 10
min_rna_cov = 30
min_AG_rate = 0.01
min_G = 2
seq_lenght = 101

cells = "U87"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)
  
inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)
    
inputs = []
for i in range(1):
    inputs.append([samples[i][0], samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov])

with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
Total evaluated rows: 213159783
[2024-08-12 09:10:30.350499] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:23.735237.
[2024-08-12 09:10:30.353553] Starting extraction of intervals.
[2024-08-12 09:10:30.378151] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/U87/outTable_921089530


 51%|█████     | 4644/9180 [00:13<00:12, 356.94it/s]

Total evaluated rows: 225346133
[2024-08-12 09:10:44.128755] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:37.514037.
[2024-08-12 09:10:44.130865] Starting extraction of intervals.
[2024-08-12 09:10:44.147168] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/U87/outTable_853538513


100%|██████████| 9180/9180 [00:26<00:00, 346.47it/s]]


[2024-08-12 09:10:57.013568] Features Extraction Finished. Elapsed time 0:00:26.655084.
[2024-08-12 09:10:57.041925] Computation Finished. Total Elapsed time: 0:04:50.426690


100%|██████████| 11663/11663 [00:34<00:00, 338.28it/s]


[2024-08-12 09:11:18.732449] Features Extraction Finished. Elapsed time 0:00:34.597876.
[2024-08-12 09:11:18.761879] Computation Finished. Total Elapsed time: 0:05:12.147182
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
Total evaluated rows: 225346133
[2024-08-12 09:17:20.854011] Extraction of candidates bonafide sites finished for outTable_853538513 outTable_921089530 samples. Elapsed time: 0:06:01.982123.


Pysam version used: 0.15.4
Script time --> START: 12/08/2024 09:17:22
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/U87/outTable_853538513_outTable_921089530_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 09:17:42
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 09:19:21
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/U87/outTable_853538513_outTable_921089530_candidates_bona_fide_sites.out.rmsk.refseq
Script time --> END: 12/08/2024 09:19:43


In [2]:
min_dna_cov = 10
min_rna_cov = 50
min_AG_rate = 0.01
min_G = 3
seq_lenght = 101

cells = "HEK293T"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)

inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)

inputs = []
for i in range(3):
    inputs.append([samples[i][0], samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov])
    inputs.append([samples[i][2], samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov])

with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0


	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
Total evaluated rows: 194677901
[2024-08-12 14:58:25.243071] Extraction o

 19%|█▉        | 2330/12094 [00:06<00:27, 349.98it/s]

	Sites evaluated: 200000000


 23%|██▎       | 2783/12094 [00:08<00:28, 322.24it/s]

Total evaluated rows: 199831579
[2024-08-12 14:58:33.662586] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:12.957696.
[2024-08-12 14:58:33.666338] Starting extraction of intervals.


 23%|██▎       | 2816/12094 [00:08<00:28, 320.76it/s]

[2024-08-12 14:58:33.710815] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_110067244


 25%|██▌       | 3037/12094 [00:08<00:26, 342.09it/s]

	Sites evaluated: 200000000


 27%|██▋       | 3274/12094 [00:09<00:28, 306.20it/s]

	Sites evaluated: 200000000


 27%|██▋       | 3305/12094 [00:09<00:29, 298.65it/s]

	Sites evaluated: 200000000


 29%|██▉       | 3492/12094 [00:10<00:25, 341.79it/s]

	Sites evaluated: 200000000


  1%|          | 179/24911 [00:02<07:44, 53.29it/s] 

	Sites evaluated: 200000000


 39%|███▉      | 4698/12094 [00:13<00:22, 329.25it/s]

Total evaluated rows: 205675117


  6%|▌         | 1457/24911 [00:05<01:04, 362.32it/s]

[2024-08-12 14:58:39.319305] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:18.615736.
[2024-08-12 14:58:39.321487] Starting extraction of intervals.
[2024-08-12 14:58:39.339426] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_364841872


 45%|████▍     | 5404/12094 [00:16<00:20, 331.32it/s]

	Sites evaluated: 200000000


 15%|█▍        | 3636/24911 [00:12<01:03, 336.12it/s]

Total evaluated rows: 208788107
[2024-08-12 14:58:46.314016] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:04:25.608619.
[2024-08-12 14:58:46.318108] Starting extraction of intervals.


 16%|█▌        | 1922/12302 [00:06<00:31, 330.61it/s]

[2024-08-12 14:58:46.351927] Loading reditable with tabix and pysam:

 15%|█▍        | 3676/24911 [00:12<01:00, 350.38it/s]

 /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_597789462


100%|██████████| 12094/12094 [00:35<00:00, 340.78it/s]
 42%|████▏     | 4765/11464 [00:14<00:21, 316.62it/s]

[2024-08-12 14:59:00.839802] Features Extraction Finished. Elapsed time 0:00:35.568069.


 34%|███▍      | 8528/24911 [00:27<00:44, 371.62it/s]

[2024-08-12 14:59:00.878330] Computation Finished. Total Elapsed time: 0:04:40.175180


100%|██████████| 12302/12302 [00:37<00:00, 327.03it/s]


[2024-08-12 14:59:17.078218] Features Extraction Finished. Elapsed time 0:00:37.753599.


 91%|█████████ | 10412/11464 [00:30<00:03, 328.17it/s]

[2024-08-12 14:59:17.118613] Computation Finished. Total Elapsed time: 0:04:56.415073


100%|██████████| 11464/11464 [00:33<00:00, 339.31it/s]


[2024-08-12 14:59:20.244905] Features Extraction Finished. Elapsed time 0:00:33.921843.
[2024-08-12 14:59:20.301418] Computation Finished. Total Elapsed time: 0:04:59.596065


 84%|████████▎ | 20815/24911 [01:03<00:12, 338.46it/s]

	Sites evaluated: 250000000


 85%|████████▍ | 21110/24911 [01:04<00:10, 345.65it/s]

	Sites evaluated: 250000000


 86%|████████▌ | 21340/24911 [01:05<00:09, 365.31it/s]

	Sites evaluated: 250000000


 86%|████████▌ | 21377/24911 [01:05<00:10, 348.44it/s]

	Sites evaluated: 250000000


 93%|█████████▎| 23278/24911 [01:10<00:04, 332.02it/s]

	Sites evaluated: 250000000


100%|██████████| 24911/24911 [01:15<00:00, 329.39it/s]


[2024-08-12 14:59:49.434406] Features Extraction Finished. Elapsed time 0:01:15.762420.
[2024-08-12 14:59:49.493078] Computation Finished. Total Elapsed time: 0:05:28.788278
Total evaluated rows: 266283468
[2024-08-12 15:00:05.319840] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:05:44.614346.
[2024-08-12 15:00:05.322398] Starting extraction of intervals.
[2024-08-12 15:00:05.364119] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_208420383


 12%|█▏        | 6517/56154 [00:21<02:21, 351.83it/s]

Total evaluated rows: 288775068


 12%|█▏        | 6553/56154 [00:21<02:31, 327.37it/s]

[2024-08-12 15:00:26.864081] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:06.160674.
[2024-08-12 15:00:26.867252] Starting extraction of intervals.
[2024-08-12 15:00:26.910910] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_572868058


 10%|█         | 3962/38951 [00:13<02:03, 283.36it/s]]

Total evaluated rows: 298198400


 19%|█▉        | 10824/56154 [00:34<02:22, 318.37it/s]

[2024-08-12 15:00:40.391401] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:19.688094.
[2024-08-12 15:00:40.394616] Starting extraction of intervals.


 10%|█         | 3994/38951 [00:13<01:59, 292.80it/s]

[2024-08-12 15:00:40.459078] Loading reditable with tabix and pysam:

 19%|█▉        | 10857/56154 [00:34<02:33, 294.70it/s]

 /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_530905096


 19%|█▉        | 10925/56154 [00:34<02:26, 307.89it/s][W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_530905096.gz.tbi
  0%|          | 138/69347 [00:00<04:05, 281.57it/s]s]

Total evaluated rows: 298335470


 20%|█▉        | 11089/56154 [00:35<01:56, 388.43it/s]

[2024-08-12 15:00:41.106371] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:20.402153.
[2024-08-12 15:00:41.108853] Starting extraction of intervals.


 11%|█         | 4196/38951 [00:14<02:14, 258.82it/s]

[2024-08-12 15:00:41.152545] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_814257267


  0%|          | 220/69347 [00:00<04:45, 242.02it/s]s][W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_814257267.gz.tbi
 12%|█▏        | 4536/38951 [00:15<01:40, 341.38it/s]]

	Sites evaluated: 300000000


 62%|██████▏   | 24275/38951 [01:18<00:55, 265.07it/s]

	Sites evaluated: 350000000


 40%|███▉      | 27020/68040 [01:29<02:31, 269.97it/s]

Total evaluated rows: 369524874
[2024-08-12 15:02:10.366455] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:07:49.667151.
[2024-08-12 15:02:10.369445] Starting extraction of intervals.


 69%|██████▊   | 38540/56154 [02:04<01:02, 282.73it/s]

[2024-08-12 15:02:10.407200] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_599710609


100%|██████████| 38951/38951 [02:06<00:00, 309.10it/s]
 81%|████████▏ | 45665/56154 [02:27<00:33, 309.75it/s]

[2024-08-12 15:02:33.045413] Features Extraction Finished. Elapsed time 0:02:06.173999.


 48%|████▊     | 33374/69347 [01:52<01:54, 314.06it/s]

[2024-08-12 15:02:33.141787] Computation Finished. Total Elapsed time: 0:08:12.438428


100%|██████████| 56154/56154 [03:00<00:00, 310.32it/s]
 29%|██▉       | 16439/56679 [00:56<02:00, 333.12it/s]

[2024-08-12 15:03:06.690481] Features Extraction Finished. Elapsed time 0:03:01.365068.


 65%|██████▌   | 44409/68040 [02:25<01:16, 309.85it/s]

[2024-08-12 15:03:06.822092] Computation Finished. Total Elapsed time: 0:08:46.116650


100%|██████████| 68040/68040 [03:42<00:00, 305.22it/s]
 97%|█████████▋| 67365/69347 [03:43<00:08, 247.16it/s]

[2024-08-12 15:04:24.167965] Features Extraction Finished. Elapsed time 0:03:43.056779.


 67%|██████▋   | 37988/56679 [02:13<01:13, 253.58it/s]

[2024-08-12 15:04:24.333978] Computation Finished. Total Elapsed time: 0:10:03.629815


100%|██████████| 69347/69347 [03:49<00:00, 301.59it/s]
 70%|███████   | 39873/56679 [02:19<00:57, 294.56it/s]

[2024-08-12 15:04:30.488156] Features Extraction Finished. Elapsed time 0:03:50.090828.


 70%|███████   | 39904/56679 [02:20<00:57, 293.38it/s]

[2024-08-12 15:04:30.645221] Computation Finished. Total Elapsed time: 0:10:09.941982


100%|██████████| 56679/56679 [03:17<00:00, 286.88it/s]


[2024-08-12 15:05:28.085651] Features Extraction Finished. Elapsed time 0:03:17.713170.
[2024-08-12 15:05:28.219872] Computation Finished. Total Elapsed time: 0:11:07.520620
	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0

	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
Total evaluated rows: 199831579
[2024-08-12 15:11:40.602585] Extraction of candidates bonafide sites finished for outTable_110067244 outTable_597789462 

Pysam version used: 0.15.4
Script time --> START: 12/08/2024 15:16:34
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_110067244_outTable_597789462_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 15:17:28
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 15:16:34
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_572868058_outTable_364841872_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 15:17:38
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 15:16:34
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_599710609_outTable_905657585_candidates_bona_fide_sites.out.rmsk
Script time --> END: 12/08/2024 15:17:54
Pysam version used: 0.15.4
Script time --> START: 12/08/2024 15:16:34
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/inde