# Environment

In [1]:
import higlass
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
import clodius
import os
import os.path as path
import pandas as pd
import numpy as np
import itertools
import negspy.coordinates as nc

## Load annotations

In [2]:
genes = beddb("../2019-10-24_higlass/Data/hg38/gene-annotations-hg38.beddb")
chrom_sizes = chromsizes("../2019-10-24_higlass/hg38.chrom.sizes")
label_font_size = 18
chr_label_size = 30
annots_size = 150

chrom_labels = {
    p: Track(
        track_type=l + "-chromosome-labels",
        tileset=chrom_sizes,
        position=p,
        height=chr_label_size,
        width=chr_label_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

gene_annots = {
    p: Track(
        track_type=l + "-gene-annotations",
        tileset=genes,
        position=p,
        height=annots_size,
        width=annots_size,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ) for p, l in zip(["top", "bottom", "left", "right"], ["horizontal", "horizontal", "vertical", "vertical"])
}

hg38 = nc.get_chrominfo("hg38")

# Data
## Contact matrices

In [16]:
tumour_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
tumour_metadata = tumour_metadata.loc[tumour_metadata.Include == "Yes", :]
tumour_metadata["SampleID"] = ["PCa" + str(i) for i in tumour_metadata["Sample ID"]]

tumour_samples = tumour_metadata["SampleID"].tolist()
t2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "Yes", "SampleID"].tolist()
nont2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "No", "SampleID"].tolist()

benign_metadata = pd.read_csv(path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX_HiC", "config.tsv"), sep="\t", header=0)
benign_metadata = benign_metadata.loc[benign_metadata.Include == "Yes", :]
benign_samples = benign_metadata["Sample"].tolist()

cell_line_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "Rhie_2019", "config.tsv"), sep="\t", header=0)
cell_line_samples = cell_line_metadata["Run_Accession"].tolist()

all_samples = tumour_samples + benign_samples + cell_line_samples
metadata = pd.read_csv(path.join("..", "2020-01-15_TAD-aggregation", "config.tsv"), sep="\t", index_col=False, header=0)
cooler_files = (
    [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in tumour_samples + benign_samples]
    + [path.join("..", "..", "Data", "External", "Rhie_2019", "Contacts", s + ".mcool") for s in cell_line_samples]
)
lowc_tilesets = {s: cooler(f) for s, f in zip(all_samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

min_resolution = 40000
heatmap_size = 500
colour_range = [
    "rgba(65, 105, 225, 1.0)",
    "rgba(255, 250, 250, 1.0)",
    # "rgba(240, 128, 128, 1.0)",
    "rgba(255, 25, 25, 1.0)"
]
lowc_heatmaps = {
    s: {
        p: Track(
            track_type=l + "heatmap",
            position=p,
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
                "colorRange": colour_range,
            },
        ) for p, l in zip(
            ["top", "bottom", "left", "right", "center"],
            ["horizontal-", "horizontal-", "vertical-", "vertical-", ""]
        )
    } for s in all_samples
}

metadata

Unnamed: 0,SampleID,Include,Source,Type,Label,Sample_Colour,Type_Colour,Contact_File,Tissue
0,PCa13266,Yes,Primary,Malignant,CPCG0268,#7F3C8D,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
1,PCa13848,Yes,Primary,Malignant,CPCG0366,#11A579,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
2,PCa14121,Yes,Primary,Malignant,CPCG0255,#3969AC,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
3,PCa19121,Yes,Primary,Malignant,CPCG0258,#F2B701,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
4,PCa3023,Yes,Primary,Malignant,CPCG0324,#E73F74,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
5,PCa33173,Yes,Primary,Malignant,CPCG0246,#80BA5A,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
6,PCa40507,Yes,Primary,Malignant,CPCG0342,#E68310,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
7,PCa51852,Yes,Primary,Malignant,CPCG0336,#008695,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
8,PCa53687,Yes,Primary,Malignant,CPCG0339,#CF1C90,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate
9,PCa56413,Yes,Primary,Malignant,CPCG0331,#F97B72,#1F77B4,../../Data/Processed/2019-06-18_PCa-LowC-seque...,Prostate


## Load breakpoints

In [4]:
BREAK_DIR = path.join("..", "2020-02-19_chromoplexy")
breaks = pd.read_csv(
    path.join(BREAK_DIR, "Graphs", "sv-breakpoints.paired.tsv"),
    sep="\t",
    header=0,
    index_col=False,
)

break_files_1D = {s: path.join(BREAK_DIR, "Tracks", s + ".breaks.beddb") for s in tumour_samples}
break_tilesets_1D = {
    s: beddb(
        v,
        name=metadata.loc[metadata["SampleID"] == s, "Label"].values[0] + " Breaks"
    ) for s, v in break_files_1D.items()
}
break_tracks_1D = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=break_tilesets_1D[s],
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["SampleID"] == s, "Label"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s in tumour_samples
}

disruption_tests = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-disruption-tests.tsv"), sep="\t")
bp_singletons = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.tsv"), sep="\t")
bp_pairs = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.paired.tsv"), sep="\t")
disruption_tads = pd.read_csv(path.join("..", "2020-02-19_sv-disruption-TADs", "sv-disruption-tests.TADs.tsv"), sep="\t")

# T2E locus

In [19]:
pos = [
    nc.chr_pos_to_genome_pos("chr21", 38000000, hg38),
    nc.chr_pos_to_genome_pos("chr21", 42000000, hg38)
]

v = [
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
        ],
        initialXDomain = pos,
    )
] + [
    View(
        tracks=[lowc_heatmaps[s]["top"]],
        initialXDomain = pos,
    ) for s in tumour_samples
]

d, s, c = higlass.display(
    views=v,
    location_syncs=[
        (v[i + 1], lowc_heatmaps[s]["top"]) for i, s in enumerate(tumour_samples)
    ],
    value_scale_syncs=[
        [(v[i + 1], lowc_heatmaps[s]["top"]) for i, s in enumerate(tumour_samples)]
    ],
)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'cufDjHsZQBSzDoxHLJd8Ww', 'tracks': {'top': [{'ty…

# Complex strctural variants on chr3

In [20]:
sv_sample_id = "PCa56413"
pos = [
    nc.chr_pos_to_genome_pos("chr3", 0, hg38),
    nc.chr_pos_to_genome_pos("chr3", 177400000, hg38)
]

v = View(
    tracks=[
        chrom_labels["top"],
        #gene_annots["top"],
        chrom_labels["left"],
        #gene_annots["left"],
        lowc_heatmaps[sv_sample_id]["center"],
    ],
    initialXDomain = pos,
)

d, s, c = higlass.display(
    views=[v],
)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'a-HssGF4R_ma2Hi9VF6tLw', 'tracks': {'top': [{'ty…