# Environment

In [1]:
import higlass
print(higlass.__version__)
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
from higlass.viewer import save_b64_image_to_png
from cooler import Cooler

import clodius
import os
import os.path as path
import pandas as pd
import numpy as np
import scipy.stats as stats
import itertools
import negspy.coordinates as nc

import matplotlib.pyplot as plt

def bed2ddb(filepath, uuid=None, **kwargs):
    from clodius.tiles.bed2ddb import get_2d_tileset_info, get_2D_tiles
    from clodius.tiles.utils import tiles_wrapper_2d

    return Tileset(
        uuid=uuid,
        tileset_info=lambda: get_2d_tileset_info(filepath),
        tiles=lambda tids: tiles_wrapper_2d(
            tids, lambda z, x, y: get_2D_tiles(filepath, z, x, y)[(x, y)]
        ),
        **kwargs
    )


0.4.5


# Annotations

In [2]:
annot_dir = path.join("..", "results", "2019-10-24_higlass")
genes = beddb(path.join(annot_dir, "Data", "hg38", "gene-annotations-hg38.beddb"))
chrom_sizes = chromsizes(path.join("2019-10-24_higlass", "hg38.chrom.sizes"))
label_font_size = 18
chr_label_size = 30
annots_size = 150

chrom_labels = {
    p: Track(
        track_type=l + "-chromosome-labels",
        tileset=chrom_sizes,
        position=p,
        height=chr_label_size,
        width=chr_label_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(
        ["top", "bottom", "left", "right"],
        ["horizontal", "horizontal", "vertical", "vertical"]
    )
}
chrom_grid = {
    p: Track(
        track_type=l + "-chromosome-grid",
        tileset=chrom_sizes,
        position=p,
        height=chr_label_size,
        width=chr_label_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(
        ["top", "bottom", "left", "right"],
        ["horizontal", "horizontal", "vertical", "vertical"])
}

gene_annots = {
    p: Track(
        track_type=l + "-gene-annotations",
        tileset=genes,
        position=p,
        height=annots_size,
        width=annots_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

hg38 = nc.get_chrominfo("hg38")

# Data

## Metadata

In [3]:
meta = pd.read_csv("config.tsv", sep="\t")
meta = meta.loc[meta.Include == "Yes", :]

SAMPLES = {
    "all": meta["Sample_ID"].tolist(),
    "tumour": meta.loc[(meta.Source == "Primary") & (meta.Type == "Malignant"), "Sample_ID"].tolist(),
    "benign": meta.loc[(meta.Source == "Primary") & (meta.Type == "Benign"), "Sample_ID"].tolist(),
    "primary": meta.loc[(meta.Source == "Primary"), "Sample_ID"].tolist(),
    "clines": meta.loc[(meta.Source == "Cell Line"), "Sample_ID"].tolist(),
}
meta

Unnamed: 0,Sample_ID,Label,Include,Source,Type,Sample_Colour,Type_Colour,Tissue,T2E
0,PCa13266,CPCG0268,Yes,Primary,Malignant,#7F3C8D,#1F77B4,Prostate,No
1,PCa13848,CPCG0366,Yes,Primary,Malignant,#11A579,#1F77B4,Prostate,Yes
2,PCa14121,CPCG0255,Yes,Primary,Malignant,#3969AC,#1F77B4,Prostate,No
3,PCa19121,CPCG0258,Yes,Primary,Malignant,#F2B701,#1F77B4,Prostate,Yes
4,PCa3023,CPCG0324,Yes,Primary,Malignant,#E73F74,#1F77B4,Prostate,Yes
5,PCa33173,CPCG0246,Yes,Primary,Malignant,#80BA5A,#1F77B4,Prostate,No
6,PCa40507,CPCG0342,Yes,Primary,Malignant,#E68310,#1F77B4,Prostate,Yes
7,PCa51852,CPCG0336,Yes,Primary,Malignant,#008695,#1F77B4,Prostate,Yes
8,PCa53687,CPCG0339,Yes,Primary,Malignant,#CF1C90,#1F77B4,Prostate,No
9,PCa56413,CPCG0331,Yes,Primary,Malignant,#F97B72,#1F77B4,Prostate,Yes


## Contact matrices

In [4]:
resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]
min_resolution = 10000
colour_range = [
    "rgba(65, 105, 225, 1.0)",
    "rgba(255, 250, 250, 1.0)",
    "rgba(255, 25, 25, 1.0)"
]

mtx_dir = {
    "primary": path.join("..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts"),
    "clines": path.join("..", "..", "Data", "External", "Rhie_2019", "Contacts"),
}
mtx_files = {}
for s in SAMPLES["primary"]:
    mtx_files[s] = path.join(mtx_dir["primary"], s + ".mcool")

for s in SAMPLES["clines"]:
    mtx_files[s] = path.join(mtx_dir["clines"], s + ".mcool")

mtx_tilesets = {
    s: cooler(f)
    for s, f in mtx_files.items()
}

contacts = {
    s: {
        p: Track(
            track_type=pl + "heatmap",
            datatype="matrix",
            filetype="cooler",
            tileset=mtx_tilesets[s],
            height=height,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": meta.loc[meta["Sample_ID"] == s, "Label"].values[0],
                "colorRange": colour_range,
            }
        )
        for p, pl, height in zip(
            ["top", "right", "bottom", "left", "center"],
            ["", "", "", "", ""],
            #["horizontal-", "vertical-", "horizontal-", "vertical-", ""],
            #["linear-", "linear-", "linear-", "linear-", ""],
            [120, 120, 120, 120, 480],
        )
    }
    for s in mtx_files.keys()
}

## Structural variants

In [5]:
sv_dir = path.join("..", "results", "2020-02-19_chromoplexy")
sv_track_dir = path.join(sv_dir, "Tracks")
breaks = pd.read_csv(
    path.join(sv_dir, "Graphs", "sv-breakpoints.paired.tsv"),
    sep="\t",
    header=0,
    index_col=False,
)

sv_files = {
    "1D": {
        s: path.join(sv_track_dir, s + ".breaks.beddb")
        for s in SAMPLES["tumour"]
    },
    "2D": {
        s: path.join(sv_track_dir, s + ".breaks.bed2ddb")
        for s in SAMPLES["tumour"]
    }
}
sv_tilesets = {
    "1D": {
        s: beddb(
            v,
            name=meta.loc[meta["Sample_ID"] == s, "Label"].values[0] + " Breaks"
        )
        for s, v in sv_files["1D"].items()
    },
    "2D": {
        s: bed2ddb(
            v,
            name=meta.loc[meta["Sample_ID"] == s, "Label"].values[0] + " Breaks"
        )
        for s, v in sv_files["2D"].items()
    }
}
svs = {
    "1D": {
        s: {
            p: Track(
                track_type=t + "bedlike",
                datatype="bedlike",
                filetype="beddb",
                position=p,
                tileset=sv_tilesets["1D"][s],
                options={
                    "showMousePosition": True,
                    "name": meta.loc[meta["Sample_ID"] == s, "Label"].values[0],
                    "labelPosition": "topLeft",
                    "labelColor": meta.loc[meta.Sample_ID == s, "Sample_Colour"].values[0],
                    "fillColor": meta.loc[meta.Sample_ID == s, "Sample_Colour"].values[0],
                },
            ) for p, t in zip(
                ["top", "bottom", "left", "right"],
                ["", "", "vertical-", "vertical-"])
        }
        for s in SAMPLES["tumour"]
    },
    "2D": {
        s: {
            p: Track(
                track_type=pl+"2d-rectangle-domains",
                datatype="2d-rectangle-domains",
                filetype="bed2ddb",
                position=p,
                tileset=sv_tilesets["2D"][s],
                height=50,
                width=50,
                options={
                    "showMousePosition": True,
                    "name": meta.loc[meta.Sample_ID == s, "Label"].values[0],
                    "labelPosition": "topLeft",
                    "labelColor": meta.loc[meta.Sample_ID == s, "Sample_Colour"].values[0],
                    "rectangleDomainFillColor": meta.loc[meta.Sample_ID == s, "Sample_Colour"].values[0],
                },
            )
            for p, pl in zip(
                ["top", "right", "bottom", "left", "center"],
                # later versions of HiGlass switch "horizontal"/"vertical" to "linear"
                ["horizontal-", "vertical-", "horizontal-", "vertical-", ""],
            )
        }
        for s in SAMPLES["tumour"]
    }
}
breaks

Unnamed: 0,chr_x,start_x,end_x,chr_y,start_y,end_y,breakpoint_ID_x,breakpoint_ID_y,component_ID_x,component_ID_y,test_ID_x,test_ID_y,SampleID,sv_type
0,chr3,73800000,75000000,chr4,179100000,179600000,1,0,0,0,1,0,PCa13266,BND
1,chr3,76010000,76130000,chr4,181680000,181710000,3,2,1,1,3,2,PCa13266,BND
2,chr6,84300000,84600000,chr6,127700000,128300000,4,5,2,2,4,5,PCa13266,DUP
3,chr1,75460000,75560000,chr1,79350000,79420000,6,7,0,0,6,7,PCa13848,DUP
4,chr1,151000000,151300000,chr13,99400000,100600000,29,28,11,11,31,28,PCa13848,BND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,chr12,108100000,108600000,chr7,113700000,116100000,308,309,1,1,308,309,PCa58215,BND
190,chr12,128100000,130500000,chr7,106500000,108000000,315,311,2,2,311,312,PCa58215,BND
191,chr12,128100000,130500000,chr7,138200000,139200000,315,310,2,2,311,310,PCa58215,BND
192,chr13,30100000,30600000,chr13,73700000,75300000,313,314,3,3,314,315,PCa58215,DEL


# Visualization

This will show the contact matrix where a single structural variant breakpoint is detected.
Start by specifying the row index in the `breaks` table that you want to show.

In [33]:
row_idx = 19

The next cell will load the relevant contact matrix, breakpoints, and more.

In [34]:
# basepairs to show +/- around the SV
offset = 1e5
mut_sample = breaks.iloc[row_idx]["SampleID"]

# get the component_ID of the breakpoint
component_id = breaks.iloc[row_idx]["component_ID_x"]
print(component_id)
print(breaks.loc[
    (breaks["SampleID"] == mut_sample)
    & (
        (breaks["component_ID_x"] == component_id)
        | (breaks["component_ID_y"] == component_id)
    ),
    :
])


# genomic coordinates
coords = {
    "x": {
        "chr": breaks.iloc[row_idx]["chr_x"],
        "start": breaks.iloc[row_idx]["start_x"] - offset,
        "end": breaks.iloc[row_idx]["end_x"] + offset,
    },
    "y": {
        "chr": breaks.iloc[row_idx]["chr_y"],
        "start": breaks.iloc[row_idx]["start_y"] - offset,
        "end": breaks.iloc[row_idx]["end_y"] + offset,
    },
}

# convert to absolute plotting coordinates
domains = {
    "x": [
        nc.chr_pos_to_genome_pos(coords["x"]["chr"], coords["x"]["start"], hg38),
        nc.chr_pos_to_genome_pos(coords["x"]["chr"], coords["x"]["end"], hg38),
    ],
    "y": [
        nc.chr_pos_to_genome_pos(coords["y"]["chr"], coords["y"]["start"], hg38),
        nc.chr_pos_to_genome_pos(coords["y"]["chr"], coords["y"]["end"], hg38),
    ],
}
print(mut_sample)
print(coords)

17
    chr_x   start_x     end_x  chr_y   start_y     end_y  breakpoint_ID_x  \
19  chr18  76340000  76460000  chr20  57700000  58220000               40   

    breakpoint_ID_y  component_ID_x  component_ID_y  test_ID_x  test_ID_y  \
19               41              17              17         42         43   

    SampleID sv_type  
19  PCa13848     BND  
PCa13848
{'x': {'chr': 'chr18', 'start': 76240000.0, 'end': 76560000.0}, 'y': {'chr': 'chr20', 'start': 57600000.0, 'end': 58320000.0}}


The cell below will show everything together in a combined plot.

In [35]:
views = [
    View(
        tracks=[
            chrom_labels["top"],
            chrom_grid["top"],
            chrom_labels["left"],
            chrom_grid["left"],
            contacts[mut_sample]["center"],
            svs["1D"][mut_sample]["top"],
            svs["1D"][mut_sample]["left"],
            svs["2D"][mut_sample]["left"],
        ],
        initialXDomain=domains["x"],
        initialYDomain=domains["y"],
    ),
]

d, s, v = higlass.display(
    views=views,
    zoom_syncs=[views],
    location_syncs=[views],
)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'Oi58btfoR1Ky9FXIGJcniw', 'tracks': {'top': [{'ty…