# Environment

In [34]:
import higlass
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
import os
import os.path as path
import pandas as pd
import negspy.coordinates as nc
import numpy as np

def bed2ddb(filepath, uuid=None, **kwargs):
    from clodius.tiles.utils import tiles_wrapper_2d
    from clodius.tiles.bed2ddb import get_2d_tileset_info, get_2D_tiles
    return Tileset(
        uuid=uuid,
        tileset_info=lambda: get_2d_tileset_info(filepath),
        tiles=lambda tids: tiles_wrapper_2d(
            tids,
            lambda z,x,y: get_2D_tiles(filepath, z, x, y)[(x, y)]
        ),
        **kwargs
    )


# Data
## Load annotations

In [35]:
genes = beddb("Data/hg38/gene-annotations-hg38.beddb")
chrom_sizes = chromsizes("hg38.chrom.sizes")

chrom_labels = {
    "top": Track(
        track_type="horizontal-chromosome-labels",
        tileset=chrom_sizes,
        position="top",
        options={
            "fontSize": 16,
            "showMousePosition": True,
        },
    ),
    "left": Track(
        track_type="vertical-chromosome-labels",
        tileset=chrom_sizes,
        position="left",
        options={
            "fontSize": 16,
            "showMousePosition": True,
        },
    )
}

gene_annots = {
    "top": Track(
        track_type="horizontal-gene-annotations",
        tileset=genes,
        position="top",
        height=150,
        options={
            "fontSize": 16,
            "showMousePosition": True,
        },
    ),
    "left": Track(
        track_type="vertical-gene-annotations",
        tileset=genes,
        position="left",
        options={
            "fontSize": 16,
            "showMousePosition": True,
        },
    )
}

ctcf_motifs = {
    "top": Track(
        track_type="bedlike",
        server="//higlass.io/api/v1",
        tileset_uuid="EkPGY0iFQx6Nq6vdF8CpWA",
        position="top",
        options={
            "fontSize": 12,
            "showMousePosition": True,
        },
    ),
    "left": Track(
        track_type="bedlike",
        server="//higlass.io/api/v1",
        tileset_uuid="EkPGY0iFQx6Nq6vdF8CpWA",
        position="left",
        options={
            "fontSize": 12,
            "showMousePosition": True,
        },
    )
}

hg38 = nc.get_chrominfo("hg38")

## Load contact matrices

In [36]:
metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
tumour_samples = ["PCa" + str(i) for i in metadata["Sample ID"].tolist()]

benign_metadata = pd.read_csv(path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX", "config.tsv"), sep="\t", header=0)
benign_metadata = benign_metadata.loc[~benign_metadata["Sample"].str.startswith("PCa"), :]
benign_samples = benign_metadata["Sample"].tolist()

all_samples = tumour_samples + benign_samples

cooler_files = (
    [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in tumour_samples]
    + [path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX", "Contacts", s + ".mcool") for s in benign_samples]
)
lowc_tilesets = {s: cooler(f) for s, f in zip(all_samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

min_resolution = 5000

heatmap_size = 150
lowc_heatmaps = {
    s: {
        "top": Track(
            track_type='horizontal-heatmap',
            position='top',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type='horizontal-heatmap',
            position='bottom',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": s,
            }
        ),
        "left": Track(
            track_type='vertical-heatmap',
            position='left',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            width=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": s,
            }
        ),
        "right": Track(
            track_type='vertical-heatmap',
            position='right',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            width=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": s,
            }
        ),
        "centre": Track(
            track_type='heatmap',
            position='center',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=2 * heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": s,
            }
        )
    } for s in all_samples
}

## Load TADs

In [58]:
windows = list(range(3, 31))

tad_1D_files = {s: {w: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", "separated-TADs", s + ".40000bp.w_" + str(w) + ".domains.beddb") for w in windows} for s in tumour_samples}
tad_2D_files = {s: {w: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", "separated-TADs", s + ".40000bp.w_" + str(w) + ".domains.bed2ddb") for w in windows} for s in tumour_samples}
htad_files = {s: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", s + ".40000bp.aggregated-domains.bed2ddb") for s in tumour_samples}

tad_1D_tilesets = {s: {w: beddb(v[w], name=s + " TADs (w=" + str(w) +")") for w in windows} for s, v in tad_1D_files.items()}
tad_2D_tilesets = {s: {w: bed2ddb(v[w], name=s + " TADs (w=" + str(w) +")") for w in windows} for s, v in tad_2D_files.items()}
htad_tilesets = {s: bed2ddb(v, name=s + " TADs") for s, v in htad_files.items()}

tads = {
    s: {
        w: {
           "top": Track(
                track_type="bedlike",
                position='top',
                tileset=tad_1D_tilesets[s][w],
                filetype="beddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "bottom": Track(
                track_type="bedlike",
                position='bottom',
                tileset=tad_1D_tilesets[s][w],
                filetype="beddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "left": Track(
                track_type="bedlike",
                position="left",
                tileset=tad_1D_tilesets[s][w],
                filetype="beddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "right": Track(
                track_type="bedlike",
                position="right",
                tileset=tad_1D_tilesets[s][w],
                filetype="beddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
        } for w in windows
    } for s in tumour_samples
}

tad_polygons = {
    s: {
        w: {
           "top": Track(
                track_type="horizontal-2d-rectangle-domains",
                position="top",
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "bottom": Track(
                track_type="horizontal-2d-rectangle-domains",
                position='bottom',
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "centre": Track(
                track_type="2d-rectangle-domains",
                position="center",
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
        } for w in windows
    } for s in tumour_samples
}

hierarchical_tads = {
    s: {
       "top": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="top",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type="vertical-2d-rectangle-domains",
            position="bottom",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            }
        ),
    } for s in tumour_samples
}

## H3K27ac ChIP-seq

In [72]:
chip_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_treat_pileup.filtered.bw") for s in tumour_samples}
chip_tilesets = {s: bigwig(f) for s, f in chip_files.items()}
chip = {
    s: {
        "top": Track(
            track_type="horizontal-line",
            position="top",
            tileset=ts,
            filetype="vector",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type="horizontal-line",
            position="bottom",
            tileset=ts,
            filetype="vector",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "left": Track(
            track_type="vertical-line",
            position="left",
            tileset=ts,
            filetype="vector",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "right": Track(
            track_type="vertical-line",
            position="right",
            tileset=ts,
            filetype="vector",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
    } for s, ts in chip_tilesets.items()
}

peak_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_peaks.filtered.beddb") for s in tumour_samples}
peak_tilesets = {s: beddb(f) for s, f in peak_files.items()}
peaks = {
    s: {
        "top": Track(
            track_type="bedlike",
            position="top",
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type="bedlike",
            position="bottom",
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "left": Track(
            track_type="bedlike",
            position="left",
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "right": Track(
            track_type="bedlike",
            position="right",
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
    } for s, ts in peak_tilesets.items()
}

## Load breakpoints

In [73]:
BREAK_DIR = path.join("..", "2019-07-24_breakfinder", "Breakpoints", "Default")
breaks = {s: pd.read_csv(
    path.join(BREAK_DIR, s + ".breaks.sorted.manually-resolved.tsv"),
    sep="\t",
    header=None,
    index_col=False,
    names=["chr_x", "start_x", "end_x", "chr_y", "start_y", "end_y", "name", "logodds", "strand_x", "strand_y", "resolution", "Type", "Notes"],
) for s in tumour_samples}

break_files = {s: path.join(BREAK_DIR, s + ".breaks.bed2ddb") for s in tumour_samples}
break_tilesets = {s: bed2ddb(v, name=s + " Breaks") for s, v in break_files.items()}
break_tracks = {
    s: {
       "top": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="top",
            tileset=break_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type="vertical-2d-rectangle-domains",
            position="bottom",
            tileset=break_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=break_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            }
        ),
    } for s in tumour_samples
}

# Analysis
## Plot SVs

In [81]:
zoom_offset = 1e8
s = samples[1]
i = 5
b = breaks[s].iloc[i - 1, :]
print(s)
print(b)
v = View(
    tracks=[
        chrom_labels["top"], gene_annots["top"],
        chrom_labels["left"], gene_annots["left"],
        lowc_heatmaps[s]["top"],
        chip[s]["top"], peaks[s]["top"],
        CombinedTrack([lowc_heatmaps[s]["centre"], break_tracks[s]["centre"]]),
        lowc_heatmaps[s]["left"],
        chip[s]["left"], peaks[s]["left"],
    ],
    initialXDomain=[
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.start_x) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.end_x) + zoom_offset, hg38)
    ],
    initialYDomain=[
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.start_y) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.end_y) + zoom_offset, hg38)
    ],
)
y = View(
    tracks=[
        chrom_labels["top"], gene_annots["top"],
        chrom_labels["left"], gene_annots["left"],
        lowc_heatmaps[s]["top"],
        lowc_heatmaps[s]["centre"],
        lowc_heatmaps[s]["left"],
    ],
    initialXDomain=[
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.start_y) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_y, int(b.end_y) + zoom_offset, hg38)
    ],
)
x = View(
    tracks=[
        chrom_labels["top"], gene_annots["top"],
        chrom_labels["left"], gene_annots["left"],
        lowc_heatmaps[s]["top"],
        lowc_heatmaps[s]["centre"],
        lowc_heatmaps[s]["left"],
    ],
    initialXDomain=[
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.start_x) - zoom_offset, hg38),
        nc.chr_pos_to_genome_pos(b.chr_x, int(b.end_x) + zoom_offset, hg38)
    ],
)
display, server, viewconf = higlass.display([v])
display

PCa13848
chr_x                                                      chr2
start_x                                               226000000
end_x                                                 227300000
chr_y                                                      chr6
start_y                                               165300000
end_y                                                 166400000
name                                                          .
logodds                                                 2865.34
strand_x                                                      -
strand_y                                                      -
resolution                                                100kb
Type                                                        BND
Notes         Related to many other nearby aberrations on ch...
Name: 4, dtype: object


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'TiK7Wlm7QDy0HIiXU03W-g', 'tracks': {'top': [{'ty…