# Environment

In [1]:
import higlass
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
import clodius
import os
import os.path as path
import pandas as pd
import numpy as np
import itertools
import negspy.coordinates as nc

def bed2ddb(filepath, uuid=None, **kwargs):
    from clodius.tiles.utils import tiles_wrapper_2d
    from clodius.tiles.bed2ddb import tileset_info, get_2D_tiles
    return Tileset(
        uuid=uuid,
        tileset_info=lambda: get_2d_tileset_info(filepath),
        tiles=lambda tids: tiles_wrapper_2d(
            tids,
            lambda z,x,y: get_2D_tiles(filepath, z, x, y)[(x, y)]
        ),
        **kwargs
    )

## Load annotations

In [2]:
genes = beddb("Data/hg38/gene-annotations-hg38.beddb")
chrom_sizes = chromsizes("hg38.chrom.sizes")
label_font_size = 18
chr_label_size = 30
annots_size = 150

chrom_labels = {
    p: Track(
        track_type=l + "-chromosome-labels",
        tileset=chrom_sizes,
        position=p,
        height=chr_label_size,
        width=chr_label_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

gene_annots = {
    p: Track(
        track_type=l + "-gene-annotations",
        tileset=genes,
        position=p,
        height=annots_size,
        width=annots_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

hg38 = nc.get_chrominfo("hg38")

# Data
## Contact matrices

In [3]:
tumour_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
tumour_metadata = tumour_metadata.loc[tumour_metadata.Include == "Yes", :]
tumour_metadata["SampleID"] = ["PCa" + str(i) for i in tumour_metadata["Sample ID"]]

tumour_samples = tumour_metadata["SampleID"].tolist()
t2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "Yes", "SampleID"].tolist()
nont2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "No", "SampleID"].tolist()

benign_metadata = pd.read_csv(path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX_HiC", "config.tsv"), sep="\t", header=0)
benign_metadata = benign_metadata.loc[benign_metadata.Include == "Yes", :]
benign_samples = benign_metadata["Sample"].tolist()

cell_line_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "Rhie_2019", "config.tsv"), sep="\t", header=0)
cell_line_samples = cell_line_metadata["Run_Accession"].tolist()

all_samples = tumour_samples + benign_samples + cell_line_samples
metadata = pd.read_csv(path.join("..", "2020-01-15_TAD-aggregation", "config.tsv"), sep="\t", index_col=False, header=0)
cooler_files = (
    [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in tumour_samples + benign_samples]
    + [path.join("..", "..", "Data", "External", "Rhie_2019", "Contacts", s + ".mcool") for s in cell_line_samples]
)
lowc_tilesets = {s: cooler(f) for s, f in zip(all_samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

min_resolution = 40000
colour_range = [
    "rgba(65, 105, 225, 1.0)",
    "rgba(255, 250, 250, 1.0)",
    # "rgba(240, 128, 128, 1.0)",
    "rgba(255, 25, 25, 1.0)"
]
lowc_heatmaps = {
    s: {
        p: Track(
            track_type=l + "heatmap",
            position=p,
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=height,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
                "colorRange": colour_range,
            },
        ) for p, l, height in zip(
            ["top", "bottom", "left", "right", "center"],
            ["horizontal-", "horizontal-", "vertical-", "vertical-", ""],
            [250, 250, 250, 250, 500],
        )
    } for s in all_samples
}

## Compartments

In [4]:
cmpt_files = {s: path.join("/d/Cloud/OneDrive - University of Toronto/Documents/Active Projects/Davos/Cooler", s + ".compartments.cis.bw") for s in tumour_samples}
cmpt_tilesets = {s: bigwig(f) for s, f in cmpt_files.items()}
compartments = {
    s: {
        p: Track(
            track_type=t + "-line",
            position=p,
            tileset=ts,
            filetype="vector",
            height=100,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " Compartment",
                "lineStrokeColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in cmpt_tilesets.items()
}

views = [View(
    tracks=[compartments[s]["top"] for s in tumour_samples],
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr17", 49964536, hg38),
        nc.chr_pos_to_genome_pos("chr17", 50380091, hg38)
    ],
)]

d, s, v = higlass.display(
    views=views,
    zoom_syncs=[views],
    location_syncs=[views],
    value_scale_syncs=[
        [(views[0], compartments[s]["top"]) for s in tumour_samples],
    ],
)

## TADs

In [5]:
windows = list(range(3, 21))

tad_2D_files = {s: {w: path.join("..", "2020-08-29_TADs-downsampled", "Aggregated-TADs", "separated-TADs", s + ".40000bp.w_" + str(w) + ".domains.bed2ddb") for w in windows} for s in tumour_samples}
htad_files = {s: path.join("..", "2020-08-29_TADs-downsampled", "Aggregated-TADs", s + ".40000bp.aggregated-domains.bed2ddb") for s in tumour_samples}

tad_1D_tilesets = {
    s: {
        w: beddb(
            path.join("..", "2020-08-29_TADs-downsampled", "Tracks", s + ".300000000.res_40000bp.window_" + str(w) + ".domains.beddb"),
            name=s + " TADs (w=" + str(w) +")"
        ) for w in windows
    } for s in tumour_samples + benign_samples
}
tad_2D_tilesets = {s: {w: bed2ddb(v[w], name=s + " TADs (w=" + str(w) +")") for w in windows} for s, v in tad_2D_files.items()}
htad_tilesets = {s: bed2ddb(v, name=s + " TADs") for s, v in htad_files.items()}

tads = {
    s: {
        w: {
           p: Track(
                track_type=t + "bedlike",
                position=p,
                tileset=tad_1D_tilesets[s][w],
                filetype="beddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
        } for w in windows
    } for s in tumour_samples + benign_samples
}

tad_polygons = {
    s: {
        w: {
           "top": Track(
                track_type="horizontal-2d-rectangle-domains",
                position="top",
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "bottom": Track(
                track_type="horizontal-2d-rectangle-domains",
                position='bottom',
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "centre": Track(
                track_type="2d-rectangle-domains",
                position="center",
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
        } for w in windows
    } for s in tumour_samples
}

hierarchical_tads = {
    s: {
       "top": Track(
            track_type="2d-rectangle-domains",
            position="top",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type="2d-rectangle-domains",
            position="bottom",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            }
        ),
    } for s in tumour_samples
}

## H3K27ac ChIP-seq

In [6]:
line_height = 150
chip_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_FE.sorted.filtered.bw") for s in tumour_samples}
chip_tilesets = {s: bigwig(f) for s, f in chip_files.items()}
chip = {
    s: {
        p: Track(
            track_type=t + "-bar",
            position=p,
            tileset=ts,
            filetype="vector",
            height=line_height,
            width=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac",
                "barFillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in chip_tilesets.items()
}

chip_line = {
    s: {
        p: Track(
            track_type=t + "-line",
            position=p,
            tileset=ts,
            filetype="vector",
            height=line_height,
            width=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac",
                "lineStrokeColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in chip_tilesets.items()
}

peak_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_peaks.filtered.beddb") for s in tumour_samples}
peak_tilesets = {s: beddb(f) for s, f in peak_files.items()}
peaks = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac Peaks",
                "fillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s, ts in peak_tilesets.items()
}

core_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "COREs", s + ".cores.beddb") for s in tumour_samples}
core_tilesets = {s: beddb(f) for s, f in core_files.items()}
cores = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " COREs",
                "fillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s, ts in core_tilesets.items()
}


## Load breakpoints

In [7]:
BREAK_DIR = path.join("..", "2020-02-19_chromoplexy")
breaks = pd.read_csv(
    path.join(BREAK_DIR, "Graphs", "sv-breakpoints.paired.tsv"),
    sep="\t",
    header=0,
    index_col=False,
)

break_files_2D = {s: path.join(BREAK_DIR, "Tracks", s + ".breaks.bed2ddb") for s in tumour_samples}
break_tilesets_2D = {s: bed2ddb(v, name=s + " Breaks") for s, v in break_files_2D.items()}
break_tracks_2D = {
    s: {
       "top": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="top",
            tileset=break_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Breakpoints",
            },
        ),
        "bottom": Track(
            track_type="vertical-2d-rectangle-domains",
            position="bottom",
            tileset=break_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Breakpoints",
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=break_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Breakpoint Pairs",
                "flipDiagonal": "copy",
            }
        ),
    } for s in break_tilesets_2D.keys()
}

break_files_1D = {s: path.join(BREAK_DIR, "Tracks", s + ".breaks.beddb") for s in tumour_samples}
break_tilesets_1D = {
    s: beddb(
        v,
        name=metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " Breaks"
    ) for s, v in break_files_1D.items()
}
break_tracks_1D = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=break_tilesets_1D[s],
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s in tumour_samples
}

disruption_tests = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-disruption-tests.tsv"), sep="\t")
bp_singletons = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.tsv"), sep="\t")
bp_pairs = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.paired.tsv"), sep="\t")
disruption_tads = pd.read_csv(path.join("..", "2020-02-19_sv-disruption-TADs", "sv-disruption-tests.TADs.tsv"), sep="\t")

## Loops

In [8]:
LOOP_DIR = path.join("..", "2020-10-06_loops")

loop_files_1D = {s: path.join(LOOP_DIR, "Tracks", s + ".anchors.beddb") for s in tumour_samples}
loop_files_2D = {s: path.join(LOOP_DIR, "Tracks", s + ".loops.bed2ddb") for s in tumour_samples}
loop_tilesets_1D = {s: beddb(v, name=s + " Loops") for s, v in loop_files_1D.items()}
loop_tilesets_2D = {s: bed2ddb(v, name=s + " Loops") for s, v in loop_files_2D.items()}
loop_tracks_1D = {
    s: {
       "top": Track(
            track_type="bedlike",
            position="top",
            tileset=loop_tilesets_1D[s],
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": s + " Loops",
            },
        ),
        "bottom": Track(
            track_type="bedlike",
            position="bottom",
            tileset=loop_tilesets_1D[s],
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": s + " Loops",
            },
        ),
    } for s in loop_tilesets_1D.keys()
}

loop_tracks_2D = {
    s: {
       "top": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="top",
            tileset=loop_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Loops",
            },
        ),
        "bottom": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="bottom",
            tileset=loop_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Loops",
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=loop_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Loops",
                "flipDiagonal": "copy",
            }
        ),
    } for s in loop_tilesets_2D.keys()
}

# Paper figures

## Important prostate cancer regions

In [31]:
sample_tracks = [chrom_labels["top"], gene_annots["top"]] + [lowc_heatmaps[s]["top"] for s in all_samples[::-1]]
tumour_tracks = [chrom_labels["top"], gene_annots["top"]] + [lowc_heatmaps[s]["top"] for s in tumour_samples]

### FOXA1

In [15]:
foxa1_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr14", 36000000, hg38),
        nc.chr_pos_to_genome_pos("chr14", 39000000, hg38)
    ],
)

display, server, viewconf = higlass.display(
    [foxa1_view],
    value_scale_syncs=[
        [(foxa1_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8193,
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'E0YQXZX_SW2JXpeu4C3XkQ', 'tracks': {'top': [{'ty…

### _TMPRSS2 - ERG_ fusion

In [11]:
t2e_view = View(
    tracks=tumour_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr21", 35000000, hg38),
        nc.chr_pos_to_genome_pos("chr21", 43000000, hg38)
    ],
)

display, server, viewconf = higlass.display(
    views=[t2e_view],
    value_scale_syncs = [
        [(t2e_view, lowc_heatmaps[s]["top"]) for s in tumour_samples],
    ],
    #zoom_syncs=[
    #    [(t2e_view, lowc_heatmaps[s]["center"]) for s in tumour_samples],
    #],
    #location_syncs=[
    #    [(t2e_view, lowc_heatmaps[s]["center"]) for s in tumour_samples],
    #],
    server_port=8192
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'e9Lfnuv7SNSoaRv4Uz6Ulw', 'tracks': {'top': [{'ty…

### AR

In [23]:
ar_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chrX", 65800000, hg38),
        nc.chr_pos_to_genome_pos("chrX", 68700000, hg38)
    ]
)

display, server, viewconf = higlass.display(
    [ar_view],
    value_scale_syncs = [
        [(ar_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'JdyFubg-R2-66OQ1TFUxow', 'tracks': {'top': [{'ty…

### MYC

In [24]:
myc_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr8", 126000000, hg38),
        nc.chr_pos_to_genome_pos("chr8", 130000000, hg38)
    ],
)

display, server, viewconf = higlass.display(
    [myc_view],
    value_scale_syncs=[
        [(myc_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'SQ_PXsdOTaau3ltyrKoLlQ', 'tracks': {'top': [{'ty…

### HOXB13

In [25]:
hoxb13_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr17", 46160000, hg38),
        nc.chr_pos_to_genome_pos("chr17", 51320000, hg38)
    ],
)
display, server, viewconf = higlass.display(
    [hoxb13_view],
    value_scale_syncs=[
        [(hoxb13_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'XORqkA-4RxuSUVqkhmpf2A', 'tracks': {'top': [{'ty…

### KLK Cluster

In [12]:
klk_view = View(
    tracks=sample_tracks,
    #tracks = [chrom_labels["top"], gene_annots["top"]],
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr19", 49000000, hg38),
        nc.chr_pos_to_genome_pos("chr19", 53000000, hg38)
    ],
)
display, server, viewconf = higlass.display(
    [klk_view],
    value_scale_syncs=[
        [(klk_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'coYkGqDnSiu23Z7eqM5Geg', 'tracks': {'top': [{'ty…

In [13]:
myc_insertion_view = View(
    tracks=[chrom_labels["top"], gene_annots["top"]] + [lowc_heatmaps[s]["top"] for s in all_samples],
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr12", 70480000 - 5e6, hg38),
        nc.chr_pos_to_genome_pos("chr12", 70570000 + 5e6, hg38)
    ],
)

display, server, viewconf = higlass.display([
    myc_insertion_view
])
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'OUV4RS-cR2GIFhDvdO4Z0w', 'tracks': {'top': [{'ty…

## 2D Loci

In [12]:
annot_tracks = [
    chrom_labels["top"],
    gene_annots["top"],
]
tumour_tracks = [
    CombinedTrack([
        lowc_heatmaps[s]["center"],
        loop_tracks_2D[s]["centre"],
    ])
    for s in tumour_samples
]

### FOXA1

In [15]:
foxa1_views = [
    View(
        tracks = annot_tracks,
        initialXDomain=[
            nc.chr_pos_to_genome_pos("chr14", 36000000, hg38),
            nc.chr_pos_to_genome_pos("chr14", 39000000, hg38)
        ]
    )
] + [
    View(
        tracks=[
            chrom_labels["left"],
            gene_annots["left"],
            t
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos("chr14", 36000000, hg38),
            nc.chr_pos_to_genome_pos("chr14", 39000000, hg38)
        ],
    ) for t in tumour_tracks
]

display, server, viewconf = higlass.display(
    foxa1_views,
    server_port=8193,
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'OKfZi5VPQD6eeOXEkWCmJg', 'tracks': {'top': [{'ty…