In [1]:
# Copyright (c) 2023-2024 Pietro Luca Mazzacuva <pietroluca.mazzacuva@unicampus.it>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import pandas as pd
import numpy as np
import os, sys, pysam, gzip, subprocess, shlex, time
from datetime import datetime
from tqdm import tqdm
from multiprocessing import Pool
from sklearn.preprocessing import OneHotEncoder

def give_inputs(cell_line):
    
    files_path = "/lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/{}".format(cells)
    
    if cell_line == "HEK293T":  
        samples = [["outTable_599710609", "outTable_905657585", "outTable_208420383"],
                   ["outTable_572868058", "outTable_364841872", "outTable_814257267"],
                   ["outTable_110067244", "outTable_597789462", "outTable_530905096"]]

        rmsk_file = "rmsk_hg38.sorted.gtf.gz"
        refseq_file = "hg38.110.ncbiRefSeq.sorted.gtf.gz"

    elif cell_line == "HEK":
        samples = [["outTable_724242056", "outTable_816573740"],
                   ["outTable_580067564", "outTable_718392497"],
                   ["outTable_181728208", "outTable_854894021"]]
        rmsk_file = "rmsk.sorted.gtf.gz"
        refseq_file = "hg19.ncbiRefSeq.sorted.gtf.gz"

    else:
        samples = [["outTable_192318299", "outTable_436061877"],
                   ["outTable_535670354", "outTable_396704193"],
                   ["outTable_773331943", "outTable_302610513"]]
        rmsk_file = "rmsk.sorted.gtf.gz"
        refseq_file = "hg19.ncbiRefSeq.sorted.gtf.gz"
    
    return samples, rmsk_file, refseq_file, files_path

def extraction(prefix,  AG_min, AGfreq_threshold, cov_threshold, interval):    
    
    starttime = datetime.now()

    editing = []
    with gzip.open(prefix+".gz") as redi:
        for c,l in enumerate(redi):
            line = l.decode("utf-8").rstrip().split("\t")
            if line[0].find("chr") != -1:
                if line[4] != "-":
                    if int(line[4]) >= cov_threshold:
                        if line[2] == "A":
                            if line[7] == "AG":    
                                AG_rna = eval(line[6])[2]/sum(eval(line[6]))
                                if AG_rna >= AGfreq_threshold:
                                    if eval(line[6])[2] >= AG_min:
                                        editing.append(line)
                          
            if c % 50000000 == 0:
                print(f"\tSites evaluated: {c}")
    print("Total evaluated rows:", c)
    editing = pd.DataFrame(editing)
    print("Total extracted Candidates Editing sites for current sample:", editing.shape[0])
    stoptime = datetime.now()
    print(f"[{datetime.now()}] Extraction of Editing Candidates finished for current sample. Elapsed time: {stoptime-starttime}.")
    columns = ["Region", "Position", "Ref", "Strand", "Cov", "Qual", "Bases", "AllSubs", "Freq", "gCov", "gQual", "g[A,C,G,T]", "gAllSubs", "gFreq"]
    editing.columns = columns
    print(f"[{datetime.now()}] Starting extraction of intervals.")
    ohe = OneHotEncoder()
    ohe.fit(np.array(["A", "C", "G", "T"]).reshape(-1, 1))

    intervals = []
    starttime_preds = datetime.now()
    total_extracted = 0
    features_extracted_filepath = prefix+ "_feature_vectors.tsv"
    features_extracted = open(features_extracted_filepath, "w")

    df = editing.query("Region != 'chrM'")
    print(f"[{datetime.now()}] Loading reditable with tabix and pysam:", prefix)
    start_time = datetime.now()
    srr = pysam.TabixFile(prefix+".gz")
    with tqdm(total=df.shape[0], position=0, leave=True) as pbar:
        for site in df.itertuples():
            start = int(site.Position) - ((interval-1)/2)
            stop = int(site.Position) + ((interval-1)/2)
            AGrna = eval(site.Bases)[2]/sum(eval(site.Bases))
            srr_interval = []
            for s in srr.fetch(site.Region, start-1, stop):
                srr_interval.append(s.split("\t"))
            srr_interval = pd.DataFrame(srr_interval, columns=columns)
            if srr_interval.shape[0] == interval and len(set(srr_interval["Strand"])) == 1:
                intervals.append([site.Region, site.Position, site.Ref, site.Strand, AGrna, site.Bases, start, stop, stop-start + 1, srr_interval.shape[0]])
                total_extracted += 1
                strand = site.Strand
                seq = srr_interval.Ref.values.reshape(-1,1)
                seq_ohe = ohe.transform(seq).toarray().T
                vects_freqs = []
                strands = []
                vects = []
                for vect in srr_interval["Bases"]:
                    vect = np.array(eval(vect))
                    cov = sum(vect)
                    vect_freqs = vect / cov
                    vects_freqs.append(vect_freqs)
                    vects.append(vect)
                vects_freqs = np.array(vects_freqs).T
                vects = np.array(vects).T
                encoded_site = pd.concat([pd.DataFrame(seq_ohe), pd.DataFrame(vects_freqs)])
                encoded_site.reset_index(drop=True, inplace=True)
                if strand == 0: 
                    encoded_site = pd.DataFrame(np.flip(encoded_site.values, axis=1))
                encoded_site.to_csv(features_extracted, mode="a", sep="\t", header = None, index=None)
            pbar.update(1)
    intervals = pd.DataFrame(intervals)
    print(f"[{datetime.now()}] Total extracted Editing sites: {total_extracted}.")
    stop_time_global = datetime.now()
    print(f"[{datetime.now()}] Features Extraction Finished. Elapsed time {datetime.now()-starttime_preds}.")
    features_extracted.close()
    
    intervals.columns = ["Region", "Position", "RefBase", "Strand", "FreqAGrna", "BasesCounts", "Start", "Stop", "Intlen", "TabixLen"]
    intervals.to_csv(prefix + "_intervals.tsv", sep="\t", index=None)
    print(f"[{datetime.now()}] Computation Finished. Total Elapsed time: {datetime.now()-starttime}")

def candidates_bona_fide_extraction(name, path, cells, AG_min, AGfreq_threshold, cov_threshold, rna_cov_threshold, gen):    

    starttime = datetime.now()
    sites = []
    
    wgs = pysam.TabixFile(f"{path}/{cells}_WGS.gz")                                                                  
    with gzip.open(f"{path}/{name}.gz") as redi:
        for c,l in enumerate(redi):
            line = l.decode("utf-8").rstrip().split("\t")
            if line[0].find("chr") != -1:
                if line[0] != "chrM":
                    if line[4] != "-":
                        if int(line[4]) >= rna_cov_threshold:
                            if line[2] == "A":
                                if "AG" in line[7] == "AG":    
                                    AG_rna = eval(line[6])[2]/sum(eval(line[6]))
                                    if AG_rna >= AGfreq_threshold:
                                        if eval(line[6])[2] >= AG_min:
                                            region = line[0]
                                            start = int(line[1])-1
                                            stop = int(line[1])
                                            for ROW_WGS in wgs.fetch(region, start, stop):
                                                row_wgs = ROW_WGS.split("\t")
                                                if row_wgs[9] !=  "-":
                                                    if int(row_wgs[9])>=cov_threshold:
                                                        if "AG" in row_wgs[12]:
                                                            sites.append([line[0], line[1], 0])
                                                        else:
                                                            if row_wgs[12] == "-":
                                                                if line[7] == "AG":
                                                                    sites.append([line[0], line[1], 1]) 
                                else:
                                    if  line[7] == "-":
                                        if gen != "wt" and gen != "oe":                                        
                                            sites.append([line[0], line[1], 2])
                                                                                           
            if c % 50000000 == 0:
                print(f"\tSites evaluated: {c}")
     
    sites = pd.DataFrame(sites)
    sites.columns = ["Region", "Position", "Class"]
    print("Total evaluated rows:", c)
    print(f"Total extracted candidates bona fide Sites for {name} sample: {sites.shape[0]}")
    stoptime = datetime.now()
    
    print(f"[{datetime.now()}] Extraction of candidates bonafide sites finished for {name} sample. Elapsed time: {stoptime-starttime}.")
    sites.to_csv(f"{path}/{name}_bona_fide_sites.tsv", sep="\t", index=None)
                                                                                 
def bonafide_identification(tables, path, rmsk, refseq):
                           
    u_path = "/lustrehome/pietrolucamazzacuva/filezilla-recas/scripts/utilities"                                                                            
                                                                                 
    if cells != "HEK293T":
        for i in range(3):
                bonafide_wt = pd.read_table(f"{path}/{tables[i][0]}_bona_fide_sites.tsv")
                bonafide_adar_inactive = pd.read_table(f"{path}/{tables[i][1]}_bona_fide_sites.tsv")
                bonafide_adar_inactive = bonafide_adar_inactive[bonafide_adar_inactive.loc[:, "Class"]!=2]
                bonafide = pd.concat([bonafide_wt, bonafide_adar_inactive], axis=0)
                bonafide.drop_duplicates(subset=["Region", "Position", "Class"], keep="first", inplace=True)
                bonafide.to_csv(f"{path}/{tables[i][0]}_{tables[i][1]}_candidates_bona_fide_sites.tsv", sep="\t", index=None, header=False)
                
    else:
        for i in range(3):
            for j in [0, 2]:
                bonafide_wt = pd.read_table(f"{path}/{tables[i][j]}_bona_fide_sites.tsv")
                bonafide_adar_inactive = pd.read_table(f"{path}/{tables[i][1]}_bona_fide_sites.tsv")
                bonafide_adar_inactive = bonafide_adar_inactive[bonafide_adar_inactive.loc[:, "Class"]!=2]
                bonafide = pd.concat([bonafide_wt, bonafide_adar_inactive], axis=0)
                bonafide.drop_duplicates(subset=["Region", "Position", "Class"], keep="first", inplace=True)
                bonafide.to_csv(f"{path}/{tables[i][j]}_{tables[i][1]}_candidates_bona_fide_sites.tsv", sep="\t", index=None, header=False)

    for file_name in os.listdir(path):
        if file_name.find("_candidates_bona_fide_sites.tsv") !=-1:
            df = pd.read_csv(os.path.join(path, file_name), sep="\t")
            df.to_csv(os.path.join(path, file_name), sep="\t", index=None, header=False)
            name = file_name.replace(".tsv", "")
            cmd_sh = "python3 {}/AnnotateTablePython3.py -a {}/{} -n rmsk -i {}/{}.tsv -o {}/{}.out.rmsk -u".format(u_path, u_path, rmsk, path, name, path, name)
            args = shlex.split(cmd_sh)
            p = subprocess.Popen(args, env=dict(os.environ, PATH="/lustrehome/pietrolucamazzacuva/anaconda3/envs/tensorflow-gpu/bin"))

    time.sleep(60)

    for name in os.listdir(path):
        if name.find("rmsk") != -1:
            cmd_sh = "python3 {}/AnnotateTablePython3.py -a {}/{} -i {}/{} -o {}/{}.refseq -u".format(u_path, u_path, refseq, path, name, path, name)
            args = shlex.split(cmd_sh)
            p = subprocess.Popen(args, env=dict(os.environ, PATH="/lustrehome/pietrolucamazzacuva/anaconda3/envs/tensorflow-gpu/bin"))

    time.sleep(60)

    cols = ["Region", "Position", "Class", "RMSK-Rep", "RMSK-Reg", "RefSeq-Rep", "RefSeq-Reg"]
    for file_name in os.listdir(path):
        if file_name.find(".refseq") !=-1:
            df = pd.read_table(os.path.join(path, file_name), header=None)
            name = file_name.replace(".out.rmsk.refseq", "_annoted.tsv")
            df.columns = cols
            df.to_csv(os.path.join(path, name), sep="\t", index=None)

    for file_name in os.listdir(path):
        if file_name.find("candidates_bona_fide_sites_annoted.tsv") !=-1:
            bona_fide = pd.read_csv(os.path.join(path, file_name), sep="\t")
            rep = bona_fide[(bona_fide.iloc[:, 3] != "-") & (bona_fide.iloc[:, 4] != "-")]
            non_rep = bona_fide[(bona_fide.iloc[:, 3] == "-") & (bona_fide.iloc[:, 4] == "-")]

            del bona_fide

            non_rep_n = non_rep[non_rep.iloc[:, 2]==0]
            non_rep_p = non_rep[non_rep.iloc[:, 2]==1]

            del non_rep

            non_rep_p = non_rep_p[(non_rep_p.iloc[:, 5] != "-") & (non_rep_p.iloc[:, 6] != "-")]
            bona_fide = pd.concat([rep, non_rep_n, non_rep_p])

            del rep, non_rep_n, non_rep_p

            name = file_name.replace("candidates_bona_fide_sites_annoted.tsv", "bona_fide_sites.tsv")
            bona_fide = bona_fide.sort_values(["Region", "Position"])
            bona_fide.to_csv(os.path.join(path, name), sep="\t", index=None)

In [2]:
min_dna_cov = 10
min_rna_cov = 50
min_AG_rate = 0.01
min_G = 3
seq_lenght = 101

cells = "a549"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)
    
inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)

inputs = []
for i in range(3):
    inputs.append([samples[i][0], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "wt"])
    inputs.append([samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "si"])
    
with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(samples, filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0	Sites evaluated: 0
	Sites evaluated: 0


	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
Total evaluated rows: 216398797
Total extracted Candidates Editing sites for current sample: 6334
[2024-06-20 22:58:22.589626] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:48.816

  0%|          | 21/6332 [00:00<00:30, 208.73it/s][W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_436061877.gz.tbi
100%|██████████| 6332/6332 [00:20<00:00, 308.56it/s]


[2024-06-20 22:58:43.237277] Total extracted Editing sites: 2686.
[2024-06-20 22:58:43.238624] Features Extraction Finished. Elapsed time 0:00:20.645196.
[2024-06-20 22:58:43.255616] Computation Finished. Total Elapsed time: 0:07:09.482184
Total evaluated rows: 236217106
Total extracted Candidates Editing sites for current sample: 15176
[2024-06-20 22:58:59.608335] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:07:25.834205.
[2024-06-20 22:58:59.610056] Starting extraction of intervals.
[2024-06-20 22:58:59.621243] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_773331943


  0%|          | 0/15175 [00:00<?, ?it/s]

Total evaluated rows:

  0%|          | 35/15175 [00:00<00:43, 347.22it/s]

 235536788
Total extracted Candidates Editing sites for current sample: 16327
[2024-06-20 22:58:59.871058] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:07:26.097302.
[2024-06-20 22:58:59.872960] Starting extraction of intervals.
[2024-06-20 22:58:59.884580] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_535670354


[W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_773331943.gz.tbi
  1%|          | 102/15175 [00:00<00:48, 309.59it/s][W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_535670354.gz.tbi
 14%|█▍        | 2266/16326 [00:07<00:44, 318.05it/s]

Total evaluated rows: 241233789
Total extracted Candidates Editing sites for current sample: 8834
[2024-06-20 22:59:07.549251] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:07:33.774983.
[2024-06-20 22:59:07.551001] Starting extraction of intervals.


 15%|█▍        | 2222/15175 [00:07<00:41, 312.12it/s]

[2024-06-20 22:59:07.560305] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_302610513


 18%|█▊        | 2991/16326 [00:09<00:42, 310.92it/s]

Total evaluated rows: 239780038
Total extracted Candidates Editing sites for current sample: 8358
[2024-06-20 22:59:09.839079] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:07:36.065105.
[2024-06-20 22:59:09.840637] Starting extraction of intervals.


  7%|▋         | 655/8833 [00:02<00:26, 304.30it/s]

[2024-06-20 22:59:09.848980] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_396704193


 22%|██▏       | 1900/8833 [00:06<00:23, 297.19it/s]]

Total evaluated rows: 241316303
Total extracted Candidates Editing sites for current sample: 18396
[2024-06-20 22:59:13.938996] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:07:40.165606.
[2024-06-20 22:59:13.940744] Starting extraction of intervals.
[2024-06-20 22:59:13.953993] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_192318299


100%|██████████| 8833/8833 [00:26<00:00, 327.56it/s]]]
 98%|█████████▊| 8205/8357 [00:24<00:00, 310.32it/s]

[2024-06-20 22:59:34.602669] Total extracted Editing sites: 3767.
[2024-06-20 22:59:34.604190] Features Extraction Finished. Elapsed time 0:00:27.050903.


 73%|███████▎  | 11020/15175 [00:34<00:12, 345.96it/s]

[2024-06-20 22:59:34.632821] Computation Finished. Total Elapsed time: 0:08:00.858571


100%|██████████| 8357/8357 [00:25<00:00, 331.66it/s]s]
 36%|███▋      | 6688/18396 [00:21<00:29, 390.91it/s]

[2024-06-20 22:59:35.110966] Total extracted Editing sites: 3526.
[2024-06-20 22:59:35.112383] Features Extraction Finished. Elapsed time 0:00:25.269999.


 70%|██████▉   | 11359/16326 [00:35<00:14, 338.95it/s]

[2024-06-20 22:59:35.132154] Computation Finished. Total Elapsed time: 0:08:01.358206


100%|██████████| 15175/15175 [00:47<00:00, 320.77it/s]


[2024-06-20 22:59:47.070485] Total extracted Editing sites: 6450.
[2024-06-20 22:59:47.072034] Features Extraction Finished. Elapsed time 0:00:47.460085.


 95%|█████████▍| 15455/16326 [00:47<00:02, 348.60it/s]

[2024-06-20 22:59:47.106644] Computation Finished. Total Elapsed time: 0:08:13.332540


100%|██████████| 16326/16326 [00:49<00:00, 328.17it/s]


[2024-06-20 22:59:49.782611] Total extracted Editing sites: 6856.
[2024-06-20 22:59:49.784150] Features Extraction Finished. Elapsed time 0:00:49.909293.
[2024-06-20 22:59:49.823810] Computation Finished. Total Elapsed time: 0:08:16.050076


100%|██████████| 18396/18396 [00:56<00:00, 327.20it/s]


[2024-06-20 23:00:10.258783] Total extracted Editing sites: 7691.
[2024-06-20 23:00:10.260284] Features Extraction Finished. Elapsed time 0:00:56.317654.
[2024-06-20 23:00:10.299339] Computation Finished. Total Elapsed time: 0:08:36.525977
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	S

Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:08:45
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_192318299_outTable_436061877_candidates_bona_fide_sites.out.rmsk
Script time --> END: 20/06/2024 23:08:50
Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:08:45
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_535670354_outTable_396704193_candidates_bona_fide_sites.out.rmsk
Script time --> END: 20/06/2024 23:08:50
Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:08:45
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_773331943_outTable_302610513_candidates_bona_fide_sites.out.rmsk
Script time --> END: 20/06/2024 23:08:50
Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:09:43
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_d

In [3]:
min_dna_cov = 10
min_rna_cov = 50
min_AG_rate = 0.01
min_G = 3
seq_lenght = 101

cells = "HEK293T"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)
    
inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)

inputs = []
for i in range(3):
    inputs.append([samples[i][0], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "wt"])
    inputs.append([samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "ko"])
    inputs.append([samples[i][2], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "oe"])
    
with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(samples, filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0	Sites evaluated: 0

	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0

	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
Total evaluated rows: 194677901
Total extracted Candidates Editing sites 

 20%|█▉        | 2299/11701 [00:07<00:28, 333.76it/s]

Total evaluated rows: 199831579
Total extracted Candidates Editing sites for current sample: 24397
[2024-06-20 23:16:57.528219] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:13.991545.
[2024-06-20 23:16:57.529895] Starting extraction of intervals.
[2024-06-20 23:16:57.547187] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_110067244


 21%|██▏       | 2502/11701 [00:08<00:28, 325.41it/s]

	Sites evaluated: 200000000


  1%|          | 175/24391 [00:00<01:32, 260.54it/s]]

	Sites evaluated: 200000000


 28%|██▊       | 3287/11701 [00:10<00:25, 336.26it/s]

	Sites evaluated: 200000000


  6%|▌         | 1463/24391 [00:04<01:05, 352.16it/s]

	Sites evaluated: 200000000


 34%|███▍      | 4032/11701 [00:13<00:20, 375.71it/s]

	Sites evaluated: 200000000


  8%|▊         | 1989/24391 [00:06<01:10, 317.40it/s]

	Sites evaluated: 200000000


 44%|████▍     | 5201/11701 [00:16<00:19, 330.98it/s]

	Sites evaluated: 200000000


 15%|█▍        | 3537/24391 [00:11<01:01, 338.43it/s]

Total evaluated rows: 205675117
Total extracted Candidates Editing sites for current sample: 11897
[2024-06-20 23:17:08.904752] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:25.368399.
[2024-06-20 23:17:08.906031] Starting extraction of intervals.
[2024-06-20 23:17:08.916164] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_364841872


 16%|█▋        | 1949/11891 [00:05<00:27, 362.96it/s]

Total evaluated rows: 208788107


 70%|██████▉   | 8145/11701 [00:25<00:09, 372.75it/s]

Total extracted Candidates Editing sites for current sample: 11086
[2024-06-20 23:17:14.842036] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:06:31.305257.
[2024-06-20 23:17:14.843638] Starting extraction of intervals.


 23%|██▎       | 5568/24391 [00:16<00:53, 353.98it/s]

[2024-06-20 23:17:14.854045] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_597789462


100%|██████████| 11701/11701 [00:35<00:00, 330.88it/s]


[2024-06-20 23:17:25.121305] Total extracted Editing sites: 5094.
[2024-06-20 23:17:25.122692] Features Extraction Finished. Elapsed time 0:00:35.456901.


 37%|███▋      | 8909/24391 [00:27<00:46, 334.61it/s]

[2024-06-20 23:17:25.150804] Computation Finished. Total Elapsed time: 0:06:41.615066


100%|██████████| 11891/11891 [00:37<00:00, 318.06it/s]


[2024-06-20 23:17:46.416743] Total extracted Editing sites: 5077.
[2024-06-20 23:17:46.418239] Features Extraction Finished. Elapsed time 0:00:37.510433.
[2024-06-20 23:17:46.448269] Computation Finished. Total Elapsed time: 0:07:02.911940


100%|██████████| 11079/11079 [00:32<00:00, 339.37it/s]


[2024-06-20 23:17:47.666364] Total extracted Editing sites: 4770.
[2024-06-20 23:17:47.667852] Features Extraction Finished. Elapsed time 0:00:32.822303.
[2024-06-20 23:17:47.693392] Computation Finished. Total Elapsed time: 0:07:04.156634


100%|██████████| 24391/24391 [01:13<00:00, 332.08it/s]


[2024-06-20 23:18:11.325984] Total extracted Editing sites: 9960.
[2024-06-20 23:18:11.327608] Features Extraction Finished. Elapsed time 0:01:13.795830.
[2024-06-20 23:18:11.378308] Computation Finished. Total Elapsed time: 0:07:27.841671
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
Total evaluated rows: 266283468
Total extracted Candidates Editing sites for current sample: 54617
[2024-06-20 23:19:09.523138] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:08:25.987219.
[2024-06-20 23:19:09.524678] Starting extraction of intervals.
[2024-06-20 23:19:09.553983] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_208420383


 22%|██▏       | 12036/54614 [00:38<02:08, 330.75it/s]

Total evaluated rows: 288775068
Total extracted Candidates Editing sites for current sample: 37723
[2024-06-20 23:19:48.632268] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:09:05.096241.
[2024-06-20 23:19:48.634055] Starting extraction of intervals.
[2024-06-20 23:19:48.656887] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_572868058


 15%|█▍        | 5509/37720 [00:18<01:49, 293.84it/s]]

Total evaluated rows: 298198400
Total extracted Candidates Editing sites for current sample: 67410
[2024-06-20 23:20:07.617086] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:09:24.080233.
[2024-06-20 23:20:07.618757] Starting extraction of intervals.


 33%|███▎      | 17880/54614 [00:57<01:53, 322.32it/s]

[2024-06-20 23:20:07.651913] Loading reditable with tabix and pysam:

 15%|█▍        | 5539/37720 [00:18<01:49, 294.58it/s]

 /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_530905096


  0%|          | 46/67408 [00:00<02:27, 456.46it/s]/s][W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_530905096.gz.tbi
 34%|███▍      | 18580/54614 [01:00<02:37, 228.47it/s]

Total evaluated rows: 298335470
Total extracted Candidates Editing sites for current sample: 66236
[2024-06-20 23:20:10.089100] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:09:26.552563.
[2024-06-20 23:20:10.090511] Starting extraction of intervals.

 17%|█▋        | 6225/37720 [00:21<01:49, 288.50it/s]


[2024-06-20 23:20:10.125660] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_814257267


  0%|          | 45/66233 [00:00<02:28, 444.92it/s]s]][W::hts_idx_load3] The index file is older than the data file: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_814257267.gz.tbi
  3%|▎         | 1682/66233 [00:07<02:58, 362.31it/s]]

	Sites evaluated: 300000000


100%|██████████| 37720/37720 [02:03<00:00, 304.78it/s]


[2024-06-20 23:21:52.545699] Total extracted Editing sites: 15241.
[2024-06-20 23:21:52.547589] Features Extraction Finished. Elapsed time 0:02:03.911378.


 46%|████▌     | 30356/66233 [01:42<02:21, 253.23it/s]

[2024-06-20 23:21:52.619219] Computation Finished. Total Elapsed time: 0:11:09.083227


 46%|████▌     | 30561/66233 [01:42<01:48, 329.46it/s]

	Sites evaluated: 350000000


100%|██████████| 54614/54614 [02:56<00:00, 310.28it/s]
 52%|█████▏    | 34423/66233 [01:55<01:47, 296.16it/s]

[2024-06-20 23:22:05.709157] Total extracted Editing sites: 22910.
[2024-06-20 23:22:05.711114] Features Extraction Finished. Elapsed time 0:02:56.184547.


 52%|█████▏    | 34453/66233 [01:55<01:47, 295.70it/s]

[2024-06-20 23:22:05.819946] Computation Finished. Total Elapsed time: 0:11:22.284066


 64%|██████▎   | 42068/66233 [02:20<01:15, 320.94it/s]

Total evaluated rows: 369524874


 63%|██████▎   | 42794/67408 [02:22<01:28, 279.31it/s]

Total extracted Candidates Editing sites for current sample: 54971
[2024-06-20 23:22:30.445528] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:11:46.909936.
[2024-06-20 23:22:30.447296] Starting extraction of intervals.
[2024-06-20 23:22:30.476711] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_599710609


100%|██████████| 66233/66233 [03:38<00:00, 302.96it/s]
 99%|█████████▉| 66777/67408 [03:41<00:01, 355.82it/s]

[2024-06-20 23:23:48.894535] Total extracted Editing sites: 27907.
[2024-06-20 23:23:48.896641] Features Extraction Finished. Elapsed time 0:03:38.804193.


 99%|█████████▉| 66813/67408 [03:41<00:01, 322.58it/s]

[2024-06-20 23:23:49.019091] Computation Finished. Total Elapsed time: 0:13:05.482591


100%|██████████| 67408/67408 [03:43<00:00, 301.85it/s]


[2024-06-20 23:23:51.105796] Total extracted Editing sites: 28354.
[2024-06-20 23:23:51.107770] Features Extraction Finished. Elapsed time 0:03:43.487258.


 42%|████▏     | 22813/54968 [01:20<01:39, 322.71it/s]

[2024-06-20 23:23:51.235882] Computation Finished. Total Elapsed time: 0:13:07.699063


100%|██████████| 54968/54968 [03:10<00:00, 288.38it/s]


[2024-06-20 23:25:41.172069] Total extracted Editing sites: 22284.
[2024-06-20 23:25:41.173951] Features Extraction Finished. Elapsed time 0:03:10.724731.
[2024-06-20 23:25:41.274966] Computation Finished. Total Elapsed time: 0:14:57.739403
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
Total evaluated rows: 194677901
Total extracted candidates bona fide Sites for outTable_905657585 sample: 8210567
[2024-06-20 23:32:33.835527

Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:48:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_110067244_outTable_597789462_candidates_bona_fide_sites.out.rmsk
Script time --> END: 20/06/2024 23:48:14
Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:48:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_572868058_outTable_364841872_candidates_bona_fide_sites.out.rmsk
Script time --> END: 20/06/2024 23:48:15
Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:48:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK293T/outTable_208420383_outTable_905657585_candidates_bona_fide_sites.out.rmsk
Script time --> END: 20/06/2024 23:48:17
Pysam version used: 0.22.0
Script time --> START: 20/06/2024 23:48:06
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/inde

In [4]:
min_dna_cov = 10
min_rna_cov = 30
min_AG_rate = 0.01
min_G = 2
seq_lenght = 101

cells = "HEK"

samples, rmsk_file_name, refseq_file_name, filespath = give_inputs(cells)
    
inputs = []
for i in range(len(samples)):
    for j in range(len(samples[i])):
        inputs.append([os.path.join(filespath, f"{samples[i][j]}"), min_G, min_AG_rate, min_rna_cov, seq_lenght])

with Pool(9) as pool:
    pool.starmap(extraction, inputs)
    
inputs = []
for i in range(3):
    inputs.append([samples[i][0], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "wt"])
    inputs.append([samples[i][1], filespath, cells, min_G, min_AG_rate, min_dna_cov, min_rna_cov, "ko"])

with Pool(6) as pool:
    pool.starmap(candidates_bona_fide_extraction, inputs)     

bonafide_identification(samples, filespath, rmsk_file_name, refseq_file_name)

	Sites evaluated: 0
	Sites evaluated: 0	Sites evaluated: 0
	Sites evaluated: 0

	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 250000000
	Sites evaluated: 300000000
	Sites evaluated: 

 27%|██▋       | 1340/4980 [00:15<00:28, 125.91it/s]

Total evaluated rows: 422584480
Total extracted Candidates Editing sites for current sample: 12583
[2024-06-21 00:03:09.575480] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:13:01.698895.
[2024-06-21 00:03:09.577170] Starting extraction of intervals.
[2024-06-21 00:03:09.593031] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_181728208


100%|██████████| 4980/4980 [00:35<00:00, 139.97it/s]]


[2024-06-21 00:03:29.619463] Total extracted Editing sites: 2111.
[2024-06-21 00:03:29.620904] Features Extraction Finished. Elapsed time 0:00:35.730531.
[2024-06-21 00:03:29.634804] Computation Finished. Total Elapsed time: 0:13:21.758437


 81%|████████▏ | 10215/12571 [00:39<00:08, 288.82it/s]

	Sites evaluated: 450000000


100%|██████████| 12571/12571 [00:47<00:00, 262.08it/s]


[2024-06-21 00:03:57.681722] Total extracted Editing sites: 5419.
[2024-06-21 00:03:57.683067] Features Extraction Finished. Elapsed time 0:00:48.103907.
[2024-06-21 00:03:57.710897] Computation Finished. Total Elapsed time: 0:13:49.834337
	Sites evaluated: 450000000
	Sites evaluated: 450000000
Total evaluated rows: 452378572
Total extracted Candidates Editing sites for current sample: 8304
[2024-06-21 00:04:08.446169] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:14:00.570265.
[2024-06-21 00:04:08.447757] Starting extraction of intervals.
[2024-06-21 00:04:08.456748] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_724242056


  4%|▍         | 317/8295 [00:03<01:29, 88.83it/s] 

	Sites evaluated: 450000000


 81%|████████  | 6697/8295 [00:39<00:04, 351.17it/s]

Total evaluated rows: 469580145
Total extracted Candidates Editing sites for current sample: 6281
[2024-06-21 00:04:48.147142] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:14:40.271069.
[2024-06-21 00:04:48.148722] Starting extraction of intervals.
[2024-06-21 00:04:48.156728] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_816573740


 93%|█████████▎| 7717/8295 [00:43<00:02, 262.39it/s]

Total evaluated rows: 483808594


  8%|▊         | 483/6270 [00:03<00:23, 245.07it/s]

Total extracted Candidates Editing sites for current sample: 12185
[2024-06-21 00:04:51.760578] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:14:43.884334.
[2024-06-21 00:04:51.762162] Starting extraction of intervals.
[2024-06-21 00:04:51.771791] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_580067564


100%|██████████| 8295/8295 [00:45<00:00, 181.52it/s]


[2024-06-21 00:04:54.297978] Total extracted Editing sites: 3457.
[2024-06-21 00:04:54.299496] Features Extraction Finished. Elapsed time 0:00:45.849778.
[2024-06-21 00:04:54.319516] Computation Finished. Total Elapsed time: 0:14:46.443629


100%|██████████| 6270/6270 [00:34<00:00, 179.16it/s]]


[2024-06-21 00:05:23.239732] Total extracted Editing sites: 2710.
[2024-06-21 00:05:23.241228] Features Extraction Finished. Elapsed time 0:00:35.090540.
[2024-06-21 00:05:23.257438] Computation Finished. Total Elapsed time: 0:15:15.381380


 85%|████████▍ | 10329/12180 [00:46<00:10, 180.72it/s]

	Sites evaluated: 500000000


100%|██████████| 12180/12180 [00:56<00:00, 216.97it/s]


[2024-06-21 00:05:48.038639] Total extracted Editing sites: 5312.
[2024-06-21 00:05:48.040024] Features Extraction Finished. Elapsed time 0:00:56.275927.
[2024-06-21 00:05:48.068157] Computation Finished. Total Elapsed time: 0:15:40.191933
Total evaluated rows: 519011879
Total extracted Candidates Editing sites for current sample: 7987
[2024-06-21 00:06:14.468222] Extraction of Editing Candidates finished for current sample. Elapsed time: 0:16:06.591532.
[2024-06-21 00:06:14.469949] Starting extraction of intervals.
[2024-06-21 00:06:14.478656] Loading reditable with tabix and pysam: /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_854894021


100%|██████████| 7971/7971 [00:46<00:00, 173.10it/s]


[2024-06-21 00:07:00.883094] Total extracted Editing sites: 3620.
[2024-06-21 00:07:00.884411] Features Extraction Finished. Elapsed time 0:00:46.412478.
[2024-06-21 00:07:00.903324] Computation Finished. Total Elapsed time: 0:16:53.026649
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 0
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 50000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 100000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 150000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	Sites evaluated: 200000000
	S

Pysam version used: 0.22.0
Script time --> START: 21/06/2024 00:24:28
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_580067564_outTable_718392497_candidates_bona_fide_sites.out.rmsk
Script time --> END: 21/06/2024 00:24:32
Pysam version used: 0.22.0
Script time --> START: 21/06/2024 00:24:28
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_724242056_outTable_816573740_candidates_bona_fide_sites.out.rmsk
Script time --> END: 21/06/2024 00:24:32
Pysam version used: 0.22.0
Script time --> START: 21/06/2024 00:24:28
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/HEK/outTable_181728208_outTable_854894021_candidates_bona_fide_sites.out.rmsk
Script time --> END: 21/06/2024 00:24:33
Pysam version used: 0.22.0
Script time --> START: 21/06/2024 00:25:26
Table saved on /lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_data