# Environment

In [1]:
import higlass
from higlass.client import View, Track, CombinedTrack
from cooler import Cooler
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
import clodius
from clodius.tiles.utils import tiles_wrapper_2d
from clodius.tiles.bed2ddb import tileset_info, get_2D_tiles

import os
import os.path as path
import pandas as pd
import numpy as np
import itertools
import negspy.coordinates as nc
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import scipy.stats as stats
from tqdm import tqdm
import pickle


def bed2ddb(filepath, uuid=None, **kwargs):
    return Tileset(
        uuid=uuid,
        tileset_info=lambda: tileset_info(filepath),
        tiles=lambda tids: tiles_wrapper_2d(
            tids,
            lambda z,x,y: get_2D_tiles(filepath, z, x, y)[(x, y)]
        ),
        **kwargs
    )


# Load annotations

In [2]:
genes = beddb("../2019-10-24_higlass/Data/hg38/gene-annotations-hg38.beddb")
chrom_sizes = chromsizes("../2019-10-24_higlass/hg38.chrom.sizes")
label_font_size = 18
chr_label_size = 30
annots_size = 150

chrom_labels = {
    p: Track(
        track_type=l + "-chromosome-labels",
        tileset=chrom_sizes,
        position=p,
        height=chr_label_size,
        width=chr_label_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

gene_annots = {
    p: Track(
        track_type=l + "-gene-annotations",
        tileset=genes,
        position=p,
        height=annots_size,
        width=annots_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

# genome coordinates
hg38 = nc.get_chrominfo("hg38")

# Data

## Contact matrices

In [3]:
tumour_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
tumour_metadata = tumour_metadata.loc[tumour_metadata.Include == "Yes", :]
tumour_metadata["SampleID"] = ["PCa" + str(i) for i in tumour_metadata["Sample ID"]]

tumour_samples = tumour_metadata["SampleID"].tolist()
t2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "Yes", "SampleID"].tolist()
nont2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "No", "SampleID"].tolist()

benign_metadata = pd.read_csv(path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX_HiC", "config.tsv"), sep="\t", header=0)
benign_metadata = benign_metadata.loc[benign_metadata.Include == "Yes", :]
benign_samples = benign_metadata["Sample"].tolist()

all_samples = tumour_samples + benign_samples
metadata = pd.read_csv(path.join("..", "2020-01-15_TAD-aggregation", "config.tsv"), sep="\t", index_col=False, header=0)
cooler_files = (
    [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in tumour_samples + benign_samples]
)
lowc_tilesets = {s: cooler(f) for s, f in zip(all_samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

min_resolution = 40000
heatmap_size = 500
colour_range = [
    "rgba(65, 105, 225, 1.0)",
    "rgba(255, 250, 250, 1.0)",
    # "rgba(240, 128, 128, 1.0)",
    "rgba(255, 25, 25, 1.0)"
]
lowc_heatmaps = {
    s: {
        p: Track(
            track_type=l + "heatmap",
            position=p,
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
                "colorRange": colour_range,
            },
        ) for p, l in zip(
            ["top", "bottom", "left", "right", "center"],
            ["horizontal-", "horizontal-", "vertical-", "vertical-", ""]
        )
    } for s in all_samples
}

## Loops

In [10]:
TRACK_DIR = "Tracks"
LOOP_DIR = "Loops"

loops = {
    "all": pd.read_csv(path.join(LOOP_DIR, "merged-loops.sample-counts.tsv"), sep="\t"),
}

loop_tilesets_2D = {
    "all": bed2ddb(
        path.join(TRACK_DIR, "merged-loops.sample-counts.bed2ddb"),
        name="All Loops"
    ),
    "tumour": bed2ddb(
        path.join(TRACK_DIR, "tumour-specific-loops.bed2ddb"),
        name="Tumour-specific Loops"
    ),
    "t2e": bed2ddb(
        path.join("..", "2020-10-23_t2e-loops", "Tracks", "loops.T2E-specific.bed2ddb"),
        name="T2E Loops"
    ),
}

loop_colours = {
    "tumour": "Orange",
    "benign": "Blue",
    "all": "Black",
    "t2e": "Green",
}

loop_tracks_2D = {
    loop_type: {
        "top": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="top",
            tileset=loop_tilesets_2D[loop_type],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": loop_tilesets_2D[loop_type].tileset_info()["name"],
                "fillColor": loop_colours[loop_type],
            },
        ),
        "bottom": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="bottom",
            tileset=loop_tilesets_2D[loop_type],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": loop_tilesets_2D[loop_type].tileset_info()["name"],
                "fillColor": loop_colours[loop_type],
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=loop_tilesets_2D[loop_type],
            filetype="bed2ddb",
            width=500,
            height=500,
            options={
                "showMousePosition": True,
                "name": loop_tilesets_2D[loop_type].tileset_info()["name"],
                "flipDiagonal": False,
                "fillColor": loop_colours[loop_type],
            }
        ),
    } for loop_type in ["all", "tumour", "t2e"]
}

## H3K27ac

In [11]:
line_height = 150
chip_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_FE.sorted.filtered.bw") for s in tumour_samples}
chip_tilesets = {s: bigwig(f) for s, f in chip_files.items()}
chip = {
    s: {
        p: Track(
            track_type=t + "-bar",
            position=p,
            tileset=ts,
            filetype="vector",
            height=line_height,
            width=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac",
                "barFillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in chip_tilesets.items()
}

chip_line = {
    s: {
        p: Track(
            track_type=t + "-line",
            position=p,
            tileset=ts,
            filetype="vector",
            height=line_height,
            width=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac",
                "lineStrokeColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in chip_tilesets.items()
}

peak_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_peaks.filtered.beddb") for s in tumour_samples}
peak_tilesets = {s: beddb(f) for s, f in peak_files.items()}
peaks = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac Peaks",
                "fillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s, ts in peak_tilesets.items()
}


# Figures

## FOXA1

In [12]:
test_views = [
    View(
        tracks=[
            gene_annots["top"],
            chrom_labels["top"],
            gene_annots["left"],
            chrom_labels["left"],
            peaks["PCa13848"]["top"],
            peaks["PCa13848"]["left"],
            CombinedTrack([
                lowc_heatmaps[s]["center"],
                loop_tracks_2D["all"]["centre"],
            ]),
        ],
        initialXDomain=[
            # SOX9
            # nc.chr_pos_to_genome_pos("chr17", 704e5 - 3e6, hg38),
            # nc.chr_pos_to_genome_pos("chr17", 736e5 + 3e6, hg38),
            # FOXA1
            nc.chr_pos_to_genome_pos("chr14", 376e5 - 4e5, hg38),
            nc.chr_pos_to_genome_pos("chr14", 376e5 + 4e5, hg38),
        ]
    )
    for s in ["PCa13848", "Benign-Prostate-3320942"]
]

d, s, v = higlass.display(
    views=test_views,
    #value_scale_syncs=[
    #    [(test_views[0], lowc_heatmaps["PCa13848"]["center"]), (test_views[1], lowc_heatmaps["Benign-Prostate-3320942"]["center"]), ]
    #],
    location_syncs=[
        test_views
    ],
    zoom_syncs=[
        test_views
    ],
)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'UfOydjlSTM6z6d6iPNXpXQ', 'tracks': {'top': [{'ty…

## T2E Locus

In [13]:
test_views = [
    View(
        tracks=[
            gene_annots["top"],
            chrom_labels["top"],
            gene_annots["left"],
            chrom_labels["left"],
            CombinedTrack([
                lowc_heatmaps[s]["center"],
                loop_tracks_2D["t2e"]["centre"],
            ]),
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos("chr21", 38500000, hg38),
            nc.chr_pos_to_genome_pos("chr21", 42000000, hg38),
        ]
    )
    for s in ["PCa13266", "PCa13848"]
]

d, s, v = higlass.display(
    views=test_views,
    # value_scale_syncs=[
    #     [(test_view, lowc_heatmaps[s]["top"]) for s in ["PCa13266", "SRR7702334", "Benign-Prostate-1595983", "SRR8446383"]]
    # ],
    location_syncs=[
        test_views
    ]
)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'bl9S9fYwR-2LYO3EZcjrxw', 'tracks': {'top': [{'ty…