In [1]:
import higlass
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
import clodius
import os
import os.path as path
import pandas as pd
import numpy as np
import itertools
import negspy.coordinates as nc

# Load annotations

In [2]:
genes = beddb("../2019-10-24_higlass/Data/hg38/gene-annotations-hg38.beddb")
chrom_sizes = chromsizes("../2019-10-24_higlass/hg38.chrom.sizes")
label_font_size = 18
chr_label_size = 30
annots_size = 150

chrom_labels = {
    p: Track(
        track_type=l + "-chromosome-labels",
        tileset=chrom_sizes,
        position=p,
        height=chr_label_size,
        width=chr_label_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

gene_annots = {
    p: Track(
        track_type=l + "-gene-annotations",
        tileset=genes,
        position=p,
        height=annots_size,
        width=annots_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

hg38 = nc.get_chrominfo("hg38")

# Data
## Contact matrices

In [3]:
tumour_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
tumour_metadata = tumour_metadata.loc[tumour_metadata.Include == "Yes", :]
tumour_metadata["SampleID"] = ["PCa" + str(i) for i in tumour_metadata["Sample ID"]]

tumour_samples = tumour_metadata["SampleID"].tolist()
t2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "Yes", "SampleID"].tolist()
nont2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "No", "SampleID"].tolist()

benign_metadata = pd.read_csv(path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX_HiC", "config.tsv"), sep="\t", header=0)
benign_metadata = benign_metadata.loc[benign_metadata.Include == "Yes", :]
benign_samples = benign_metadata["Sample"].tolist()

cell_line_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "Rhie_2019", "config.tsv"), sep="\t", header=0)
cell_line_samples = cell_line_metadata["Run_Accession"].tolist()

all_samples = tumour_samples + benign_samples + cell_line_samples
metadata = pd.read_csv(path.join("..", "2020-01-15_TAD-aggregation", "config.tsv"), sep="\t", index_col=False, header=0)
cooler_files = (
    [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in tumour_samples + benign_samples]
    + [path.join("..", "..", "Data", "External", "Rhie_2019", "Contacts", s + ".mcool") for s in cell_line_samples]
)
lowc_tilesets = {s: cooler(f) for s, f in zip(all_samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

min_resolution = 40000
heatmap_size = 250
colour_range = [
    "rgba(65, 105, 225, 1.0)",
    "rgba(255, 250, 250, 1.0)",
    # "rgba(240, 128, 128, 1.0)",
    "rgba(255, 25, 25, 1.0)"
]
lowc_heatmaps = {
    s: {
        p: Track(
            track_type=l + "heatmap",
            position=p,
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
                "colorRange": colour_range,
            },
        ) for p, l in zip(
            ["top", "bottom", "left", "right", "center"],
            ["horizontal-", "horizontal-", "vertical-", "vertical-", ""]
        )
    } for s in all_samples
}

## H3K27ac ChIP-seq

In [4]:
line_height = 150
chip_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_FE.sorted.filtered.bw") for s in tumour_samples}
chip_tilesets = {s: bigwig(f) for s, f in chip_files.items()}
chip = {
    s: {
        p: Track(
            track_type=t + "-bar",
            position=p,
            tileset=ts,
            filetype="vector",
            height=line_height,
            width=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac",
                "barFillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in chip_tilesets.items()
}

chip_line = {
    s: {
        p: Track(
            track_type=t + "-line",
            position=p,
            tileset=ts,
            filetype="vector",
            height=line_height,
            width=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac",
                "lineStrokeColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
    } for s, ts in chip_tilesets.items()
}

peak_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_peaks.filtered.beddb") for s in tumour_samples}
peak_tilesets = {s: beddb(f) for s, f in peak_files.items()}
peaks = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " H3K27ac Peaks",
                "fillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s, ts in peak_tilesets.items()
}

core_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "COREs", s + ".cores.beddb") for s in tumour_samples}
core_tilesets = {s: beddb(f) for s, f in core_files.items()}
cores = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " COREs",
                "fillColor": metadata.loc[metadata["SampleID"] == s, "Sample_Colour"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s, ts in core_tilesets.items()
}


## Load breakpoints

In [5]:
BREAK_DIR = path.join("..", "2020-02-19_chromoplexy")
breaks = pd.read_csv(
    path.join(BREAK_DIR, "Graphs", "sv-breakpoints.paired.tsv"),
    sep="\t",
    header=0,
    index_col=False,
)

break_files_1D = {s: path.join(BREAK_DIR, "Tracks", s + ".breaks.beddb") for s in tumour_samples}
break_tilesets_1D = {
    s: beddb(
        v,
        name=metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " Breaks"
    ) for s, v in break_files_1D.items()
}
break_tracks_1D = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=break_tilesets_1D[s],
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s in tumour_samples
}

disruption_tests = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-disruption-tests.tsv"), sep="\t")
bp_singletons = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.tsv"), sep="\t")
bp_pairs = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.paired.tsv"), sep="\t")
disruption_tads = pd.read_csv(path.join("..", "2020-02-19_sv-disruption-TADs", "sv-disruption-tests.TADs.tsv"), sep="\t")

# Structural variants and novel COREs

## ZNF516 & PMEPA1

### Acetylation

In [6]:
mut_s = "PCa13848"
zoom_offset = 1e5

pos = {
    "x": [
        nc.chr_pos_to_genome_pos("chr18", 76200000, hg38),
        nc.chr_pos_to_genome_pos("chr18", 76600000, hg38),
    ],
    "y": [
        nc.chr_pos_to_genome_pos("chr20", 57600000, hg38),
        nc.chr_pos_to_genome_pos("chr20", 57800000, hg38),
    ]
}

comb_tracks = {
    "chip": CombinedTrack([chip[s]["top"] for s in tumour_samples if s != mut_s]),
}

# views
v = [
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            break_tracks_1D[mut_s]["top"],
            chip[mut_s]["top"],
            cores[mut_s]["top"],
            peaks[mut_s]["top"],
            comb_tracks["chip"],
        ] + [
            peaks[s]["top"] for s in tumour_samples if s != mut_s
        ] + [
            cores[s]["top"] for s in tumour_samples if s != mut_s
        ],
        initialXDomain=p,
    ) for p in pos.values()
]

display, server, viewconf = higlass.display(
    views=v,
    zoom_syncs=[v],
    #location_syncs=[v],
    value_scale_syncs=[
        [(v[0], chip[s]["top"]) for s in tumour_samples],
        [(v[1], chip[s]["top"]) for s in tumour_samples],
    ],
    server_port=8192
)

display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'GgV3b8uEQYSV9msPg0CGxA', 'tracks': {'top': [{'ty…

### Rearrangement

In [12]:
mut_s = "PCa13848"
nonmut_s = "PCa40507"

pos = {
    "x": [
        nc.chr_pos_to_genome_pos("chr18", 76200000, hg38),
        nc.chr_pos_to_genome_pos("chr18", 76600000, hg38),
    ],
    "y": [
        nc.chr_pos_to_genome_pos("chr20", 57600000, hg38),
        nc.chr_pos_to_genome_pos("chr20", 57800000, hg38),
    ]
}

# views
v = [
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            chip[mut_s]["top"],
            peaks[mut_s]["top"],
            cores[mut_s]["top"],
            break_tracks_1D[mut_s]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chrom_labels["left"],
            chip[mut_s]["left"],
            peaks[mut_s]["left"],
            cores[mut_s]["left"],
            break_tracks_1D[mut_s]["left"],
            lowc_heatmaps[mut_s]["center"],
            #lowc_heatmaps[mut_s]["bottom"],
        ],
        initialXDomain=pos["x"],
        initialYDomain=pos["y"],
    ),
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            chip[nonmut_s]["top"],
            peaks[nonmut_s]["top"],
            cores[nonmut_s]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chip[nonmut_s]["left"],
            peaks[nonmut_s]["left"],
            cores[nonmut_s]["left"],
            lowc_heatmaps[nonmut_s]["center"],
            #lowc_heatmaps[nonmut_s]["bottom"],
        ],
        initialXDomain=pos["x"],
        initialYDomain=pos["y"],
    ),
]

display, server, viewconf = higlass.display(
    views=v,
    zoom_syncs=[v],
    location_syncs=[v],
    value_scale_syncs=[
        [(v[0], chip[mut_s]["top"]), (v[1], chip[nonmut_s]["top"])],
        [(v[0], chip[mut_s]["left"]), (v[1], chip[nonmut_s]["left"])],
        [(v[0], chip[mut_s]["top"]), (v[0], chip[mut_s]["left"])],
        [(v[0], lowc_heatmaps[mut_s]["center"]), (v[1], lowc_heatmaps[nonmut_s]["center"])],
        [(v[0], lowc_heatmaps[mut_s]["bottom"]), (v[1], lowc_heatmaps[nonmut_s]["bottom"])],
    ],
    server_port=8192
)

display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'MUQkIluOStypHGjrtOoWZQ', 'tracks': {'top': [{'ty…

## COL1A1 & RNF144A

### Rearrangement

In [7]:
mut_s = "PCa40507"
nonmut_s = "PCa13266"
zoom_offset = 1e5

pos = {
    "x": [
        nc.chr_pos_to_genome_pos("chr17", 50100000 - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos("chr17", 50400000 + zoom_offset, hg38),
    ],
    "y": [
        [
            nc.chr_pos_to_genome_pos("chr17", 11300000 - zoom_offset, hg38),
            nc.chr_pos_to_genome_pos("chr17", 11800000 + zoom_offset, hg38),   
        ],
        [
            nc.chr_pos_to_genome_pos("chr2", 6726000 - zoom_offset, hg38),
            nc.chr_pos_to_genome_pos("chr2", 7064000 + zoom_offset, hg38),
        ]
    ]
}

# views
v = [
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            chip[mut_s]["top"],
            peaks[mut_s]["top"],
            cores[mut_s]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chrom_labels["left"],
            chip[mut_s]["left"],
            peaks[mut_s]["left"],
            cores[mut_s]["left"],
            lowc_heatmaps[mut_s]["center"],
            #lowc_heatmaps[mut_s]["bottom"],
        ],
        initialXDomain=pos["x"],
        initialYDomain=pos["y"][0],
    ),
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            chip[nonmut_s]["top"],
            peaks[nonmut_s]["top"],
            cores[nonmut_s]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chrom_labels["left"],
            chip[nonmut_s]["left"],
            peaks[nonmut_s]["left"],
            cores[nonmut_s]["left"],
            lowc_heatmaps[nonmut_s]["center"],
            #lowc_heatmaps[nonmut_s]["bottom"],
        ],
        initialXDomain=pos["x"],
        initialYDomain=pos["y"][0],
    ),
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            chip[mut_s]["top"],
            peaks[mut_s]["top"],
            cores[mut_s]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chrom_labels["left"],
            chip[mut_s]["left"],
            peaks[mut_s]["left"],
            cores[mut_s]["left"],
            lowc_heatmaps[mut_s]["center"],
            #lowc_heatmaps[mut_s]["bottom"],
        ],
        initialXDomain=pos["x"],
        initialYDomain=pos["y"][1],
    ),
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
            chip[nonmut_s]["top"],
            peaks[nonmut_s]["top"],
            cores[nonmut_s]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chrom_labels["left"],
            chip[nonmut_s]["left"],
            peaks[nonmut_s]["left"],
            cores[nonmut_s]["left"],
            lowc_heatmaps[nonmut_s]["center"],
            #lowc_heatmaps[nonmut_s]["bottom"],
        ],
        initialXDomain=pos["x"],
        initialYDomain=pos["y"][1],
    ),
]

display, server, viewconf = higlass.display(
    views=v,
    zoom_syncs=[v],
    location_syncs=[
        v[0:2],
        v[2:4]
    ],
    value_scale_syncs=[
        [(v[0], chip[mut_s]["top"]), (v[1], chip[nonmut_s]["top"])],
        [(v[0], chip[mut_s]["left"]), (v[1], chip[nonmut_s]["left"])],
        [(v[2], chip[mut_s]["top"]), (v[3], chip[nonmut_s]["top"])],
        [(v[2], chip[mut_s]["left"]), (v[3], chip[nonmut_s]["left"])],
        [(v[0], chip[mut_s]["top"]), (v[0], chip[mut_s]["left"])],
        [(v[2], chip[mut_s]["top"]), (v[2], chip[mut_s]["left"])],
        [(v[0], lowc_heatmaps[mut_s]["center"]), (v[1], lowc_heatmaps[nonmut_s]["center"])],
        [(v[0], lowc_heatmaps[mut_s]["bottom"]), (v[1], lowc_heatmaps[nonmut_s]["bottom"])],
        [(v[2], lowc_heatmaps[mut_s]["center"]), (v[3], lowc_heatmaps[nonmut_s]["center"])],
        [(v[2], lowc_heatmaps[mut_s]["bottom"]), (v[3], lowc_heatmaps[nonmut_s]["bottom"])],
    ],
    server_port=8192
)

display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'HYaM6TUdSy6RTeCnidmuHg', 'tracks': {'top': [{'ty…