# Track File Construction

## Setup

In [1]:
import pandas as pd

In [2]:
def dedupe_tracks(dataset_tracks):
    dataset_dupes = dataset_tracks["description"].duplicated(keep=False)
    assert dataset_dupes.sum() > 0
    dataset_dupe_descs = dataset_tracks[dataset_dupes]["description"].unique()
    for dupe_desc in dataset_dupe_descs:
        duped_idxs = dataset_tracks[dataset_tracks["description"] == dupe_desc].index
        i = 1
        for idx in duped_idxs:
            prev = dataset_tracks.at[idx, "description"]
            dataset_tracks.at[idx, "description"] = prev + "_" + str(i)
            i += 1
    assert dataset_tracks["description"].duplicated(keep=False).sum() == 0

### Enformer and Basenji2 Data

In [35]:
# Download targets from Basenji2 dataset 
# Cite: Kelley et al Cross-species regulatory sequence activity prediction. PLoS Comput. Biol. 16, e1008050 (2020).
targets_txt = 'https://raw.githubusercontent.com/calico/basenji/master/manuscripts/cross2020/targets_human.txt'
target_df = pd.read_csv(targets_txt, sep='\t')
target_df

Unnamed: 0,index,genome,identifier,file,clip,scale,sum_stat,description
0,0,0,ENCFF833POA,/home/drk/tillage/datasets/human/dnase/encode/...,32,2,mean,DNASE:cerebellum male adult (27 years) and mal...
1,1,0,ENCFF110QGM,/home/drk/tillage/datasets/human/dnase/encode/...,32,2,mean,DNASE:frontal cortex male adult (27 years) and...
2,2,0,ENCFF880MKD,/home/drk/tillage/datasets/human/dnase/encode/...,32,2,mean,DNASE:chorion
3,3,0,ENCFF463ZLQ,/home/drk/tillage/datasets/human/dnase/encode/...,32,2,mean,DNASE:Ishikawa treated with 0.02% dimethyl sul...
4,4,0,ENCFF890OGQ,/home/drk/tillage/datasets/human/dnase/encode/...,32,2,mean,DNASE:GM03348
...,...,...,...,...,...,...,...,...
5308,5308,0,CNhs14239,/home/drk/tillage/datasets/human/cage/fantom/C...,384,1,sum,CAGE:epithelioid sarcoma cell line:HS-ES-2R
5309,5309,0,CNhs14240,/home/drk/tillage/datasets/human/cage/fantom/C...,384,1,sum,CAGE:squamous cell lung carcinoma cell line:RE...
5310,5310,0,CNhs14241,/home/drk/tillage/datasets/human/cage/fantom/C...,384,1,sum,CAGE:gastric cancer cell line:GSS
5311,5311,0,CNhs14244,/home/drk/tillage/datasets/human/cage/fantom/C...,384,1,sum,CAGE:carcinoid cell line:NCI-H727


### Basenji1 Data

In [18]:
b1_target_df = pd.read_csv("Models/Basenji/basenji1_targets.txt", sep="\t", 
                           header=None, names=["id", "path", "description"]).reset_index()
b1_target_df

Unnamed: 0,index,id,path,description
0,0,ENCSR000EQH_1_1,../../data/human/dnase/encode_fastq/covse/ENCS...,DNASE:T-helper 2 cell female adult (26 years)
1,1,ENCSR000EKJ_1_1,../../data/human/dnase/encode_fastq/covse/ENCS...,DNASE:induced pluripotent stem cell male adult...
2,2,ENCSR000EOM_2_1,../../data/human/dnase/encode_fastq/covse/ENCS...,DNASE:glomerular endothelial cell
3,3,ENCSR257BGZ_2_2,../../data/human/dnase/encode_fastq/covpe/ENCS...,DNASE:ACHN
4,4,ENCSR937UWI_2_2,../../data/human/dnase/encode_fastq/covpe/ENCS...,DNASE:hematopoietic multipotent progenitor cel...
...,...,...,...,...
4224,4224,CNhs13818,../../data/human/cage/fantom/coverage/CNhs1381...,"CAGE:Olfactory epithelial cells, donor3"
4225,4225,CNhs13819,../../data/human/cage/fantom/coverage/CNhs1381...,"CAGE:Olfactory epithelial cells, donor4"
4226,4226,CNhs11250,../../data/human/cage/fantom/coverage/CNhs1125...,CAGE:chronic myelogenous leukemia cell line:K562
4227,4227,CNhs10843,../../data/human/cage/fantom/coverage/CNhs1084...,"CAGE:Urothelial cells, donor0"


## Weiss Tracks

Tracks for `weiss_ingenome` and `weiss_constructs`.

In [3]:
weiss_tracks = target_df.loc[
    target_df.description.str.contains("osteoblast", case=False) 
    | target_df.description.str.contains("neuron", case=False)
][["description", "index"]]
weiss_tracks

Unnamed: 0,description,index
43,DNASE:osteoblast,43
485,DNASE:bipolar neuron originated from GM23338 t...,485
763,CHIP:H2AFZ:osteoblast,763
764,CHIP:H3K27ac:osteoblast,764
765,CHIP:H3K36me3:osteoblast,765
766,CHIP:H3K4me1:osteoblast,766
767,CHIP:H3K4me2:osteoblast,767
768,CHIP:H3K9me3:osteoblast,768
785,CHIP:H3K27me3:osteoblast,785
786,CHIP:H4K20me1:osteoblast,786


In [4]:
weiss_dupes = weiss_tracks["description"].duplicated(keep=False)
weiss_tracks[weiss_dupes]

Unnamed: 0,description,index
1644,CHIP:CTCF:bipolar neuron originated from GM233...,1644
3358,CHIP:CTCF:bipolar neuron originated from GM233...,3358


In [5]:
prev = weiss_tracks.at[1644, "description"]
weiss_tracks.at[1644, "description"] = prev + "_1"

In [6]:
prev = weiss_tracks.at[3358, "description"]
weiss_tracks.at[3358, "description"] = prev + "_2"

In [7]:
print(weiss_tracks.at[1644, "description"])

CHIP:CTCF:bipolar neuron originated from GM23338 treated with 0.5 ug/mL doxycycline hyclate for 4 days_1


In [8]:
print(weiss_tracks.at[3358, "description"])

CHIP:CTCF:bipolar neuron originated from GM23338 treated with 0.5 ug/mL doxycycline hyclate for 4 days_2


In [9]:
weiss_tracks["description"].duplicated(keep=False).sum()

0

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

## K562 Tracks

Tracks for `bergmann_exp` and `bergmann_promoteronly`

In [10]:
k562_tracks = target_df.loc[
    target_df.description.str.contains("k562", case=False)
][["description", "index"]]
k562_tracks

Unnamed: 0,description,index
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
121,DNASE:K562,121
122,DNASE:K562,122
...,...,...
4503,CHIP:NFRKB:K562,4503
4508,CHIP:XRCC3:K562,4508
4510,CHIP:NRF1:K562,4510
4828,CAGE:chronic myelogenous leukemia cell line:K562,4828


In [11]:
k562_tracks["description"].duplicated(keep=False).sum()

178

In [12]:
k562_dupes = k562_tracks["description"].duplicated(keep=False)
k562_dupe_descs = k562_tracks[k562_dupes]["description"].unique()

In [13]:
for dupe_desc in k562_dupe_descs:
    duped_idxs = k562_tracks[k562_tracks["description"] == dupe_desc].index
    i = 1
    for idx in duped_idxs:
        prev = k562_tracks.at[idx, "description"]
        k562_tracks.at[idx, "description"] = prev + "_" + str(i)
        i += 1

In [14]:
k562_tracks["description"].duplicated(keep=False).sum()

0

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

## Kircher Tracks

Tracks for `kircher_ingenome`

### Enformer/Basenji2

In [4]:
kircher_tracks = target_df.loc[
    target_df.description.str.contains("cage", case=False)
    | target_df.description.str.contains("dnase", case=False)
][["description", "index"]]
kircher_tracks

Unnamed: 0,description,index
0,DNASE:cerebellum male adult (27 years) and mal...,0
1,DNASE:frontal cortex male adult (27 years) and...,1
2,DNASE:chorion,2
3,DNASE:Ishikawa treated with 0.02% dimethyl sul...,3
4,DNASE:GM03348,4
...,...,...
5308,CAGE:epithelioid sarcoma cell line:HS-ES-2R,5308
5309,CAGE:squamous cell lung carcinoma cell line:RE...,5309
5310,CAGE:gastric cancer cell line:GSS,5310
5311,CAGE:carcinoid cell line:NCI-H727,5311


In [5]:
kircher_tracks["description"].duplicated(keep=False).sum()

128

In [6]:
kircher_dupes = kircher_tracks["description"].duplicated(keep=False)
kircher_dupe_descs = kircher_tracks[kircher_dupes]["description"].unique()

In [7]:
for dupe_desc in kircher_dupe_descs:
    duped_idxs = kircher_tracks[kircher_tracks["description"] == dupe_desc].index
    i = 1
    for idx in duped_idxs:
        prev = kircher_tracks.at[idx, "description"]
        kircher_tracks.at[idx, "description"] = prev + "_" + str(i)
        i += 1

In [8]:
kircher_tracks["description"].duplicated(keep=False).sum()

0

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [9]:
kircher_tracks.to_csv("Data/Kircher_saturation_mutagenesis/kircher_tracks.yaml",
                   index=False, header=False, sep="\t")

### Basenji1

In [32]:
b1_kircher_tracks = b1_target_df.loc[
    b1_target_df.description.str.contains("cage", case=False)
    | b1_target_df.description.str.contains("dnase", case=False)
][["description", "index"]]
b1_kircher_tracks

Unnamed: 0,description,index
0,DNASE:T-helper 2 cell female adult (26 years),0
1,DNASE:induced pluripotent stem cell male adult...,1
2,DNASE:glomerular endothelial cell,2
3,DNASE:ACHN,3
4,DNASE:hematopoietic multipotent progenitor cel...,4
...,...,...
4224,"CAGE:Olfactory epithelial cells, donor3",4224
4225,"CAGE:Olfactory epithelial cells, donor4",4225
4226,CAGE:chronic myelogenous leukemia cell line:K562,4226
4227,"CAGE:Urothelial cells, donor0",4227


In [33]:
dedupe_tracks(b1_kircher_tracks)

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [34]:
b1_kircher_tracks.to_csv("Data/Kircher_saturation_mutagenesis/basenji1_kircher_tracks.yaml",
                   index=False, header=False, sep="\t")

## Arensbergen Tracks

Tracks for `arensbergen_ingenome`.

### Enformer/Basenji2

In [17]:
arensbergen_tracks = target_df.loc[
    target_df.description.str.contains("k562", case=False) 
    | target_df.description.str.contains("hepg2", case=False)
][["description", "index"]]
arensbergen_tracks

Unnamed: 0,description,index
27,DNASE:HepG2,27
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
91,DNASE:HepG2,91
...,...,...
4508,CHIP:XRCC3:K562,4508
4510,CHIP:NRF1:K562,4510
4828,CAGE:chronic myelogenous leukemia cell line:K562,4828
5109,CAGE:hepatocellular carcinoma cell line: HepG2...,5109


In [20]:
arensbergen_dupes = arensbergen_tracks["description"].duplicated(keep=False)
arensbergen_dupes.sum()

225

In [21]:
arensbergen_dupe_descs = arensbergen_tracks[arensbergen_dupes]["description"].unique()

In [23]:
for dupe_desc in arensbergen_dupe_descs:
    duped_idxs = arensbergen_tracks[arensbergen_tracks["description"] == dupe_desc].index
    i = 1
    for idx in duped_idxs:
        prev = arensbergen_tracks.at[idx, "description"]
        arensbergen_tracks.at[idx, "description"] = prev + "_" + str(i)
        i += 1

In [24]:
arensbergen_tracks["description"].duplicated(keep=False).sum()

0

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

### Basenji1

In [38]:
b1_arensbergen_tracks = b1_target_df.loc[
    b1_target_df.description.str.contains("k562", case=False) 
    | b1_target_df.description.str.contains("hepg2", case=False)
][["description", "index"]]
b1_arensbergen_tracks

Unnamed: 0,description,index
28,DNASE:K562 G1 phase,28
38,DNASE:K562,38
55,DNASE:K562,55
146,DNASE:K562 treated with sodium butyrate,146
177,DNASE:K562,177
...,...,...
3532,CAGE:hepatocellular carcinoma cell line: HepG2...,3532
3586,CAGE:chronic myelogenous leukemia cell line:K5...,3586
3587,CAGE:chronic myelogenous leukemia cell line:K5...,3587
3588,CAGE:chronic myelogenous leukemia cell line:K5...,3588


In [39]:
dedupe_tracks(b1_arensbergen_tracks)

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [40]:
b1_arensbergen_tracks.to_csv("Data/Arensbergen_sure_mpra/basenji1_arensbergen_tracks.yaml",
                             index=False, header=False, sep="\t")

## Arensbergen Plasmid Tracks

Tracks for `arensbergen_plasmid`.

In [6]:
arensplasmid_tracks = target_df.loc[
    (target_df.description.str.contains("k562", case=False) 
      | target_df.description.str.contains("hepg2", case=False))
    & (target_df.description.str.contains("cage", case=False)
      | target_df.description.str.contains("dnase", case=False))
][["description", "index"]]
arensplasmid_tracks

Unnamed: 0,description,index
27,DNASE:HepG2,27
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
91,DNASE:HepG2,91
121,DNASE:K562,121
122,DNASE:K562,122
123,DNASE:K562,123
234,DNASE:HepG2,234
625,DNASE:K562,625


In [7]:
dedupe_tracks(arensplasmid_tracks)
arensplasmid_tracks

Unnamed: 0,description,index
27,DNASE:HepG2_1,27
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
91,DNASE:HepG2_2,91
121,DNASE:K562_1,121
122,DNASE:K562_2,122
123,DNASE:K562_3,123
234,DNASE:HepG2_3,234
625,DNASE:K562_4,625


<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [8]:
arensplasmid_tracks.to_csv("Data/Arensbergen_sure_mpra/arensbergen_plasmid_tracks.yaml",
                   index=False, header=False, sep="\t")

## Cohen Tracks

Tracks for `cohen_tripseq`.

### Enformer/Basenji2

In [11]:
cohen_tracks = target_df.loc[
    target_df.description.str.contains("k562", case=False)
    & (target_df.description.str.contains("cage", case=False)
      | target_df.description.str.contains("dnase", case=False))
][["description", "index"]]
cohen_tracks

Unnamed: 0,description,index
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
121,DNASE:K562,121
122,DNASE:K562,122
123,DNASE:K562,123
625,DNASE:K562,625
4828,CAGE:chronic myelogenous leukemia cell line:K562,4828
5111,CAGE:chronic myelogenous leukemia cell line:K5...,5111


In [12]:
dedupe_tracks(cohen_tracks)
cohen_tracks

Unnamed: 0,description,index
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
121,DNASE:K562_1,121
122,DNASE:K562_2,122
123,DNASE:K562_3,123
625,DNASE:K562_4,625
4828,CAGE:chronic myelogenous leukemia cell line:K562,4828
5111,CAGE:chronic myelogenous leukemia cell line:K5...,5111


<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [None]:
cohen_tracks.to_csv("Data/Cohen_genomic_environments/cohen_tracks.yaml",
                   index=False, header=False, sep="\t")

### Basenji1

In [41]:
b1_cohen_tracks = b1_target_df.loc[
    b1_target_df.description.str.contains("k562", case=False)
    & (b1_target_df.description.str.contains("cage", case=False)
      | b1_target_df.description.str.contains("dnase", case=False))
][["description", "index"]]
b1_cohen_tracks

Unnamed: 0,description,index
28,DNASE:K562 G1 phase,28
38,DNASE:K562,38
55,DNASE:K562,55
146,DNASE:K562 treated with sodium butyrate,146
177,DNASE:K562,177
212,DNASE:K562 treated with vorinostat,212
225,DNASE:K562 treated with vorinostat,225
231,DNASE:K562 treated with dimethyl sulfoxide,231
249,DNASE:K562 G1 phase,249
279,DNASE:K562 G1 phase,279


In [42]:
dedupe_tracks(b1_cohen_tracks)

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [43]:
b1_cohen_tracks.to_csv("Data/Cohen_genomic_environments/basenji1_cohen_tracks.yaml",
                             index=False, header=False, sep="\t")

## Abramov Tracks

In [42]:
with open("Data/Abramov_ASB/tf_list.txt") as f:
    tf_list = f.readlines()
tf_list = list(map(str.rstrip, tf_list))

In [43]:
assert len(tf_list) == len(set(tf_list))

Construct a regex because `Series.str.contains` does not understand lists:

In [44]:
tf_regex = "|".join(tf_list)

In [45]:
abramov_tracks = target_df.loc[
    target_df.description.str.contains(tf_regex, case=True) 
    & target_df.description.str.contains("CHIP", case=False)
][["description", "index"]]
abramov_tracks

Unnamed: 0,description,index
684,CHIP:CTCF:MCF-7,684
685,CHIP:TAF1:MCF-7,685
687,CHIP:CTCF:GM12878,687
697,CHIP:CTCF:K562,697
704,CHIP:CTCF:endothelial cell of umbilical vein m...,704
...,...,...
4662,CHIP:CLOCK:CLOCK_6AN_ChIPSeq / U2OS cells stab...,4662
4667,CHIP:abcam:ChIP_hBMAL1-peak-DMOG CHIP-Seq / U2...,4667
4668,CHIP:abcam:ChIP_hBMAL1-trough-DMOG CHIP-Seq / ...,4668
4669,CHIP:abcam:ChIP_hBMAL1-peak-DMSO CHIP-Seq / U2...,4669


In [46]:
abramov_dupes = abramov_tracks["description"].duplicated(keep=False)
abramov_dupes.sum()

238

In [47]:
abramov_dupe_descs = abramov_tracks[abramov_dupes]["description"].unique()

In [48]:
for dupe_desc in abramov_dupe_descs:
    duped_idxs = abramov_tracks[abramov_tracks["description"] == dupe_desc].index
    i = 1
    for idx in duped_idxs:
        prev = abramov_tracks.at[idx, "description"]
        abramov_tracks.at[idx, "description"] = prev + "_" + str(i)
        i += 1

In [49]:
abramov_tracks["description"].duplicated(keep=False).sum()

0

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

## Fulco Preprocessing Tracks

In [9]:
fulcopp_tracks = target_df.loc[
    target_df.description.str.contains("K562", case=False)
    & (target_df.description.str.contains("CAGE", case=False)
    | target_df.description.str.contains("DNASE", case=False))
][["description", "index"]]
fulcopp_tracks

Unnamed: 0,description,index
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
121,DNASE:K562,121
122,DNASE:K562,122
123,DNASE:K562,123
625,DNASE:K562,625
4828,CAGE:chronic myelogenous leukemia cell line:K562,4828
5111,CAGE:chronic myelogenous leukemia cell line:K5...,5111


In [10]:
fulcopp_dupes = fulcopp_tracks["description"].duplicated(keep=False)
fulcopp_dupes.sum()

4

In [11]:
fulcopp_dupe_descs = fulcopp_tracks[fulcopp_dupes]["description"].unique()

In [12]:
for dupe_desc in fulcopp_dupe_descs:
    duped_idxs = fulcopp_tracks[fulcopp_tracks["description"] == dupe_desc].index
    i = 1
    for idx in duped_idxs:
        prev = fulcopp_tracks.at[idx, "description"]
        fulcopp_tracks.at[idx, "description"] = prev + "_" + str(i)
        i += 1

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [14]:
fulcopp_tracks.to_csv("Data/Fulco_CRISPRi/preprocessing_tracks.yaml",
                      index=False, header=False, sep="\t")

## Sahu Tracks

In [4]:
sahu_tracks = target_df.loc[
    ((target_df.description.str.contains("CAGE", case=False) | target_df.description.str.contains("DNASE", case=False))
    & (target_df.description.str.contains("K562", case=False) | target_df.description.str.contains("colon", case=False)))
    | (target_df.description.str.contains("CHIP", case=False)) 
][["description", "index"]]
sahu_tracks

Unnamed: 0,description,index
33,DNASE:K562 treated with 1 uM vorinostat for 72...,33
34,DNASE:K562 G2 phase,34
35,DNASE:K562 G1 phase,35
121,DNASE:K562,121
122,DNASE:K562,122
...,...,...
4828,CAGE:chronic myelogenous leukemia cell line:K562,4828
4856,CAGE:colon carcinoma cell line:CACO-2,4856
4966,"CAGE:colon, fetal,",4966
4978,"CAGE:colon, adult,",4978


In [5]:
sahu_dupes = sahu_tracks["description"].duplicated(keep=False)
sahu_dupes.sum()

566

In [6]:
sahu_dupe_descs = sahu_tracks[sahu_dupes]["description"].unique()

In [7]:
for dupe_desc in sahu_dupe_descs:
    duped_idxs = sahu_tracks[sahu_tracks["description"] == dupe_desc].index
    i = 1
    for idx in duped_idxs:
        prev = sahu_tracks.at[idx, "description"]
        sahu_tracks.at[idx, "description"] = prev + "_" + str(i)
        i += 1

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [8]:
sahu_tracks.to_csv("Data/Sahu_enhancers/sahu_tracks.yaml",
                      index=False, header=False, sep="\t")

## All Tracks (avsec_enhancercentered)

### Enformer

In [39]:
alltracks = target_df[["description", "index"]]
alltracks

Unnamed: 0,description,index
0,DNASE:cerebellum male adult (27 years) and mal...,0
1,DNASE:frontal cortex male adult (27 years) and...,1
2,DNASE:chorion,2
3,DNASE:Ishikawa treated with 0.02% dimethyl sul...,3
4,DNASE:GM03348,4
...,...,...
5308,CAGE:epithelioid sarcoma cell line:HS-ES-2R,5308
5309,CAGE:squamous cell lung carcinoma cell line:RE...,5309
5310,CAGE:gastric cancer cell line:GSS,5310
5311,CAGE:carcinoid cell line:NCI-H727,5311


In [40]:
dedupe_tracks(alltracks)

<div class="alert alert-block alert-warning">
<b>Note:</b> Only activate following cell if you want to overwrite the old tracks!
</div>

In [42]:
alltracks.to_csv("Data/Tracks/all_tracks_deduped.yaml",
                      index=False, header=False, sep="\t")