In [1]:
from crr_labels import fantom, roadmap, roadmap_available_cell_lines, fantom_available_cell_lines
import os
from tqdm.auto import tqdm
import pandas as pd
from tabulate import tabulate

In [11]:
common_cell_lines = [
    "GM12878", "HelaS3", "HepG2", "K562", "A549", "H1", "H9"
]
fantom_cell_lines = [
    "MCF7", "HEK293", "Caco2", "HL60", "PC3", "JURKAT"
]
roadmap_cell_lines = [
    "DND41", "HUES48", "HUES6", "HUES64", "IMR90"
]

In [12]:
windows = (200, 300, 500, 1000, 2000)
states = (15, 18)
centers = ("peak", "center")

In [13]:
url_pattern = "`Download <https://raw.githubusercontent.com/LucaCappelletti94/crr_labels/master/{url}>`__"

In [14]:
fantom_preprocessed = []
roadmap_preprocessed = []
for window in tqdm(windows,  desc="Window", dynamic_ncols=True):
    for center_enhancers in tqdm(centers, desc="Fantom", leave=False, dynamic_ncols=True):
        path = f"preprocessed/fantom/window_size/{window}"
        enhancer_path = f"{path}/enhancers_{center_enhancers}.csv"
        promoter_path = f"{path}/promoters.csv"
        if os.path.exists(enhancer_path):
            continue
        enhancers, promoters = fantom(
            common_cell_lines+fantom_cell_lines, window, center_enhancers=center_enhancers)
        os.makedirs(path, exist_ok=True)
        enhancers.to_csv(enhancer_path, index=None)
        enhancers.to_csv(promoter_path, index=None)
    fantom_preprocessed.append({
        "Nucleotides window": window,
        "Genome":"hg19",
        "Region-centered enhancers": url_pattern.format(url=enhancer_path),
        "Peak-centered enhancers": url_pattern.format(url=enhancer_path),
        "Promoters": url_pattern.format(url=promoter_path)
    })
    new_roadmap = {}
    for state in tqdm(states, desc="Roadmap", leave=False, dynamic_ncols=True):
        path = f"preprocessed/roadmap/window_size/{window}/state/{state}"
        enhancer_path = f"{path}/enhancers.csv"
        promoter_path = f"{path}/promoters.csv"
        new_roadmap.update({
            "Nucleotides window": window,
            "Genome":"hg19",
            f"{state}-states model enhancers": url_pattern.format(url=enhancer_path),
            f"{state}-states model promoters": url_pattern.format(url=promoter_path)
        })
        if os.path.exists(path):
            continue
        enhancers, promoters = roadmap(common_cell_lines+roadmap_cell_lines, window, states=state)
        os.makedirs(path, exist_ok=True)
        enhancers.to_csv(enhancer_path, index=None)
        enhancers.to_csv(promoter_path, index=None)
    roadmap_preprocessed.append(new_roadmap)
df = pd.DataFrame(roadmap_preprocessed)[[
    "Nucleotides window",
    "Genome",
    "15-states model enhancers",
    "15-states model promoters",
    "18-states model enhancers",
    "18-states model promoters",
]]
with open("roadmap.rst", "w") as f:
    f.write(tabulate(df.values, headers=df.columns, tablefmt="grid"))

df = pd.DataFrame(fantom_preprocessed)[[
    "Nucleotides window",
    "Genome",
    "Region-centered enhancers",
    "Peak-centered enhancers",
    "Promoters"
]]
with open("fantom.rst", "w") as f:
    f.write(tabulate(df.values, headers=df.columns, tablefmt="grid"))

HBox(children=(IntProgress(value=0, description='Window', layout=Layout(flex='2'), max=5, style=ProgressStyle(…

HBox(children=(IntProgress(value=0, description='Fantom', layout=Layout(flex='2'), max=2, style=ProgressStyle(…

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


HBox(children=(IntProgress(value=0, description='Roadmap', layout=Layout(flex='2'), max=2, style=ProgressStyle…

HBox(children=(IntProgress(value=0, description='Fantom', layout=Layout(flex='2'), max=2, style=ProgressStyle(…

HBox(children=(IntProgress(value=0, description='Roadmap', layout=Layout(flex='2'), max=2, style=ProgressStyle…

HBox(children=(IntProgress(value=0, description='Fantom', layout=Layout(flex='2'), max=2, style=ProgressStyle(…

HBox(children=(IntProgress(value=0, description='Roadmap', layout=Layout(flex='2'), max=2, style=ProgressStyle…

HBox(children=(IntProgress(value=0, description='Fantom', layout=Layout(flex='2'), max=2, style=ProgressStyle(…

HBox(children=(IntProgress(value=0, description='Roadmap', layout=Layout(flex='2'), max=2, style=ProgressStyle…

HBox(children=(IntProgress(value=0, description='Fantom', layout=Layout(flex='2'), max=2, style=ProgressStyle(…

HBox(children=(IntProgress(value=0, description='Roadmap', layout=Layout(flex='2'), max=2, style=ProgressStyle…