# Environment

In [25]:
import higlass
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb
import os
import os.path as path
import pandas as pd
import negspy.coordinates as nc
import numpy as np

# Data
## Load annotations

In [26]:
# load annotations
chrom_labels = {
    "top": Track(
        track_type="horizontal-chromosome-labels",
        server="http://higlass.io/api/v1",
        tileset_uuid="NyITQvZsS_mOFNlz5C2LJg",
        position="top"
    ),
    "left": Track(
        track_type="vertical-chromosome-labels",
        server="http://higlass.io/api/v1",
        tileset_uuid="NyITQvZsS_mOFNlz5C2LJg",
        position="left"
    )
}

gene_annots = {
    "top": Track(
        track_type="horizontal-gene-annotations",
        server="http://higlass.io/api/v1",
        tileset_uuid="P0PLbQMwTYGy-5uPIQid7A",
        position="top"
    ),
    "left": Track(
        track_type="vertical-gene-annotations",
        server="http://higlass.io/api/v1",
        tileset_uuid="P0PLbQMwTYGy-5uPIQid7A",
        position="left"
    )
}

ctcf_motifs = {
    "top": Track(
        track_type="bedlike",
        server="http://higlass.io/api/v1",
        tileset_uuid="EkPGY0iFQx6Nq6vdF8CpWA",
        position="top"
    ),
    "left": Track(
        track_type="bedlike",
        server="http://higlass.io/api/v1",
        tileset_uuid="EkPGY0iFQx6Nq6vdF8CpWA",
        position="left"
    )
}

hg38 = nc.get_chrominfo("hg38")

## Load contact matrices

In [27]:
metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
samples = ["PCa" + str(i) for i in metadata["Sample ID"].tolist()]
cooler_files = [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in samples]
lowc_tilesets = {s: cooler(f) for s, f in zip(samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

lowc_heatmaps = {
    s: {
        "top": Track(
            track_type='horizontal-heatmap',
            position='top',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=150,
            options={
                "maxZoom": str(resolutions.index(10000)),
                "colorbarPosition": "topRight"
            }
        ),
        "bottom": Track(
            track_type='horizontal-heatmap',
            position='bottom',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=150,
            options={
                "maxZoom": str(resolutions.index(10000)),
                "colorbarPosition": "topRight"
            }
        ),
        "left": Track(
            track_type='vertical-heatmap',
            position='left',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            width=150,
            options={
                "maxZoom": str(resolutions.index(10000))
            }
        ),
        "right": Track(
            track_type='vertical-heatmap',
            position='right',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            width=150,
            options={
                "maxZoom": str(resolutions.index(10000))
            }
        ),
        "centre": Track(
            track_type='heatmap',
            position='center',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=500,
            options={
                "maxZoom": str(resolutions.index(10000))
            }
        )
    } for s in samples
}

## Load TADs

In [55]:
windows = list(range(3, 31))

tad_files = {s: {w: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", "separated-TADs", s + ".40000bp.w_" + str(w) + ".domains.beddb") for w in windows} for s in samples}
htad_files = {s: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", s + ".40000bp.aggregated-domains.bed2ddb") for s in samples}
tad_tilesets = {s: {w: beddb(v[w], name=s + " TADs (w=" + str(w) +")") for w in windows} for s, v in tad_files.items()}

tads = {
    s: {
        w: {
           "top": Track(
                track_type="bedlike",
                position='top',
                tileset=tad_tilesets[s][w],
                filetype="beddb",
            ),
            "bottom": Track(
                track_type="bedlike",
                position='bottom',
                tileset=tad_tilesets[s][w],
                filetype="beddb",
            ),
            "left": Track(
                track_type="bedlike",
                position="left",
                tileset=tad_tilesets[s][w],
                filetype="beddb",
            ),
            "right": Track(
                track_type="bedlike",
                position="right",
                tileset=tad_tilesets[s][w],
                filetype="beddb",
            ),
        } for w in windows
    } for s in samples
}

## Load breakpoints

In [61]:
BREAK_DIR = path.join("..", "2019-07-24_breakfinder", "Breakpoints", "Default")
breaks = {s: pd.read_csv(
    path.join(BREAK_DIR, s + ".breaks.sorted.bedpe"),
    sep="\t",
    header=None,
    index_col=False,
    names=["chr_x", "start_x", "end_x", "chr_y", "start_y", "end_y", "name", "logodds", "strand_x", "strand_y", "resolution"],
) for s in samples}

# Analysis
## Plot SVs

In [82]:
zoom_offset = 1e6
s = samples[1]
i = 13
b = breaks[s].iloc[i, :]
print(s)
print(b)
v = View(
    tracks=[
        chrom_labels["top"], gene_annots["top"],
        chrom_labels["left"], gene_annots["left"],
        lowc_heatmaps[s]["top"],
        lowc_heatmaps[s]["centre"],
        lowc_heatmaps[s]["left"],
    ],
    initialXDomain=[
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.start_x) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.end_x) + zoom_offset, hg38)
    ],
    initialYDomain=[
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.start_y) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.end_y) + zoom_offset, hg38)
    ],
)
u = View(
    tracks=[
        chrom_labels["top"], gene_annots["top"],
        chrom_labels["left"], gene_annots["left"],
        lowc_heatmaps[s]["top"],
        lowc_heatmaps[s]["centre"],
        lowc_heatmaps[s]["left"],
    ],
    initialXDomain=[
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.start_y) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.end_y) + zoom_offset, hg38)
    ],
)
w = View(
    tracks=[
        chrom_labels["top"], gene_annots["top"],
        chrom_labels["left"], gene_annots["left"],
        lowc_heatmaps[s]["top"],
        lowc_heatmaps[s]["centre"],
        lowc_heatmaps[s]["left"],
    ],
    initialXDomain=[
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.start_x) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.end_x) + zoom_offset, hg38)
    ],
)
display, server, viewconf = higlass.display([v])
display

PCa13848
chr_x             chr10
start_x       106100000
end_x         107800000
chr_y             chr20
start_y        30900000
end_y          31500000
name                  .
logodds         180.229
strand_x              -
strand_y              +
resolution        100kb
Name: 13, dtype: object


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'NjSprAMKQS-RQBRtTfynFw', 'tracks': {'top': [{'ty…