# Environment

In [1]:
import higlass
from higlass.client import View, Track, CombinedTrack
from higlass.tilesets import cooler, beddb, chromsizes, bigwig, Tileset
import clodius
import os
import os.path as path
import pandas as pd
import numpy as np
import itertools
import negspy.coordinates as nc

## Load annotations

In [2]:
genes = beddb("Data/hg38/gene-annotations-hg38.beddb")
chrom_sizes = chromsizes("hg38.chrom.sizes")
label_font_size = 18

chrom_labels = {
    "top": Track(
        track_type="horizontal-chromosome-labels",
        tileset=chrom_sizes,
        position="top",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "bottom": Track(
        track_type="horizontal-chromosome-labels",
        tileset=chrom_sizes,
        position="bottom",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "left": Track(
        track_type="vertical-chromosome-labels",
        tileset=chrom_sizes,
        position="left",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "right": Track(
        track_type="vertical-chromosome-labels",
        tileset=chrom_sizes,
        position="right",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
}

gene_annots = {
    "top": Track(
        track_type="horizontal-gene-annotations",
        tileset=genes,
        position="top",
        height=150,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "bottom": Track(
        track_type="horizontal-gene-annotations",
        tileset=genes,
        position="bottom",
        height=150,
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "left": Track(
        track_type="vertical-gene-annotations",
        tileset=genes,
        position="left",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "right": Track(
        track_type="vertical-gene-annotations",
        tileset=genes,
        position="right",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
}

ctcf_motifs = {
    "top": Track(
        track_type="bedlike",
        server="//higlass.io/api/v1",
        tileset_uuid="EkPGY0iFQx6Nq6vdF8CpWA",
        position="top",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    ),
    "left": Track(
        track_type="bedlike",
        server="//higlass.io/api/v1",
        tileset_uuid="EkPGY0iFQx6Nq6vdF8CpWA",
        position="left",
        options={
            "fontSize": label_font_size,
            "showMousePosition": True,
        },
    )
}

hg38 = nc.get_chrominfo("hg38")

# Data
## Contact matrices

In [3]:
tumour_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "LowC_Samples_Data_Available.tsv"), sep="\t", header=0)
tumour_metadata = tumour_metadata.loc[tumour_metadata.Include == "Yes", :]
tumour_metadata["SampleID"] = ["PCa" + str(i) for i in tumour_metadata["Sample ID"]]

tumour_samples = tumour_metadata["SampleID"].tolist()
t2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "Yes", "SampleID"].tolist()
nont2e_samples = tumour_metadata.loc[tumour_metadata["T2E Status"] == "No", "SampleID"].tolist()

benign_metadata = pd.read_csv(path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX", "config.tsv"), sep="\t", header=0)
benign_metadata = benign_metadata.loc[benign_metadata.Include == "Yes", :]
benign_samples = benign_metadata["Sample"].tolist()

cell_line_metadata = pd.read_csv(path.join("..", "..", "Data", "External", "Rhie_2019", "config.tsv"), sep="\t", header=0)
cell_line_samples = cell_line_metadata["Run_Accession"].tolist()

all_samples = tumour_samples + benign_samples + cell_line_samples
metadata = pd.DataFrame({
    "Sample": all_samples,
    "Label": tumour_metadata["Patient ID"].tolist() + benign_samples + cell_line_metadata["Cell_Line"].tolist()
})
cooler_files = (
    [path.join("..", "..", "Data", "Processed", "2019-06-18_PCa-LowC-sequencing", "Contacts", s + ".mcool") for s in tumour_samples]
    + [path.join("..", "..", "Data", "Raw", "191220_A00827_0104_AHMW25DMXX", "Contacts", s + ".mcool") for s in benign_samples]
    + [path.join("..", "..", "Data", "External", "Rhie_2019", "Contacts", s + ".mcool") for s in cell_line_samples]
)
lowc_tilesets = {s: cooler(f) for s, f in zip(all_samples, cooler_files)}

resolutions = [
    1000, 2000, 3000, 4000, 5000,
    10000, 20000, 30000, 40000, 50000,
    100000, 200000, 300000, 400000, 500000,
    1000000, 2000000, 3000000, 4000000, 5000000
][::-1]

min_resolution = 10000
heatmap_size = 150
colour_range = [
    "rgba(65, 105, 225, 1.0)",
    "rgba(255, 250, 250, 1.0)",
    "rgba(240, 128, 128, 1.0)",
    "rgba(255, 99, 71, 1.0)"
]
lowc_heatmaps = {
    s: {
        "top": Track(
            track_type='horizontal-heatmap',
            position='top',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
                "colorRange": colour_range,
            },
        ),
        "bottom": Track(
            track_type='horizontal-heatmap',
            position='bottom',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
                "colorRange": colour_range,
            }
        ),
        "left": Track(
            track_type='vertical-heatmap',
            position='left',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            width=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
                "colorRange": colour_range,
            }
        ),
        "right": Track(
            track_type='vertical-heatmap',
            position='right',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            width=heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
                "colorRange": colour_range,
            }
        ),
        "centre": Track(
            track_type='heatmap',
            position='center',
            tileset=lowc_tilesets[s],
            filetype="cooler",
            height=2 * heatmap_size,
            options={
                "maxZoom": str(resolutions.index(min_resolution)),
                "colorbarPosition": "topRight",
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
                "colorRange": colour_range,
            }
        )
    } for s in all_samples
}

## TADs

In [25]:
windows = list(range(3, 31))

tad_1D_files = {s: {w: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", "separated-TADs", s + ".40000bp.w_" + str(w) + ".domains.beddb") for w in windows} for s in tumour_samples}
tad_2D_files = {s: {w: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", "separated-TADs", s + ".40000bp.w_" + str(w) + ".domains.bed2ddb") for w in windows} for s in tumour_samples}
htad_files = {s: path.join("..", "2020-01-15_TAD-aggregation", "resolved-TADs", s + ".40000bp.aggregated-domains.bed2ddb") for s in tumour_samples}

def bed2ddb(filepath, uuid=None, **kwargs):
    from clodius.tiles.utils import tiles_wrapper_2d
    from clodius.tiles.bed2ddb import get_2d_tileset_info, get_2D_tiles
    return Tileset(
        uuid=uuid,
        tileset_info=lambda: get_2d_tileset_info(filepath),
        tiles=lambda tids: tiles_wrapper_2d(
            tids,
            lambda z,x,y: get_2D_tiles(filepath, z, x, y)[(x, y)]
        ),
        **kwargs
    )

tad_1D_tilesets = {s: {w: beddb(v[w], name=s + " TADs (w=" + str(w) +")") for w in windows} for s, v in tad_1D_files.items()}
tad_2D_tilesets = {s: {w: bed2ddb(v[w], name=s + " TADs (w=" + str(w) +")") for w in windows} for s, v in tad_2D_files.items()}
htad_tilesets = {s: bed2ddb(v, name=s + " TADs") for s, v in htad_files.items()}

tads = {
    s: {
        w: {
           p: Track(
                track_type=t + "bedlike",
                position=p,
                tileset=tad_1D_tilesets[s][w],
                filetype="beddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
        } for w in windows
    } for s in tumour_samples
}

tad_polygons = {
    s: {
        w: {
           "top": Track(
                track_type="horizontal-2d-rectangle-domains",
                position="top",
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "bottom": Track(
                track_type="horizontal-2d-rectangle-domains",
                position='bottom',
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
            "centre": Track(
                track_type="2d-rectangle-domains",
                position="center",
                tileset=tad_2D_tilesets[s][w],
                filetype="bed2ddb",
                options={
                    "showMousePosition": True,
                    "name": "{} (w = {})".format(s, w),
                },
            ),
        } for w in windows
    } for s in tumour_samples
}

hierarchical_tads = {
    s: {
       "top": Track(
            track_type="2d-rectangle-domains",
            position="top",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "bottom": Track(
            track_type="2d-rectangle-domains",
            position="bottom",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=htad_tilesets[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s,
            }
        ),
    } for s in tumour_samples
}

## H3K27ac ChIP-seq

In [22]:
line_height = 50
chip_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_treat_pileup.filtered.bw") for s in tumour_samples}
chip_tilesets = {s: bigwig(f) for s, f in chip_files.items()}
chip = {
    s: {
        "top": Track(
            track_type="horizontal-line",
            position="top",
            tileset=ts,
            filetype="vector",
            height=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
            },
        ),
        "bottom": Track(
            track_type="horizontal-line",
            position="bottom",
            tileset=ts,
            filetype="vector",
            height=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
            },
        ),
        "left": Track(
            track_type="vertical-line",
            position="left",
            tileset=ts,
            filetype="vector",
            height=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
            },
        ),
        "right": Track(
            track_type="vertical-line",
            position="right",
            tileset=ts,
            filetype="vector",
            height=line_height,
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
            },
        ),
    } for s, ts in chip_tilesets.items()
}

peak_files = {s: path.join("..", "..", "Data", "Processed", "2019-05-03_PCa-H3K27ac-peaks", "Peaks", s + "_peaks.filtered.beddb") for s in tumour_samples}
peak_tilesets = {s: beddb(f) for s, f in peak_files.items()}
peaks = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=ts,
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s, ts in peak_tilesets.items()
}

## Load breakpoints

In [23]:
BREAK_DIR = path.join("..", "2020-02-19_chromoplexy")
breaks = pd.read_csv(
    path.join(BREAK_DIR, "Graphs", "sv-breakpoints.paired.tsv"),
    sep="\t",
    header=0,
    index_col=False,
)

break_files_2D = {s: path.join(BREAK_DIR, "Tracks", s + ".breaks.bed2ddb") for s in tumour_samples}
break_tilesets_2D = {s: bed2ddb(v, name=s + " Breaks") for s, v in break_files_2D.items()}
break_tracks_2D = {
    s: {
       "top": Track(
            track_type="horizontal-2d-rectangle-domains",
            position="top",
            tileset=break_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Breakpoints",
            },
        ),
        "bottom": Track(
            track_type="vertical-2d-rectangle-domains",
            position="bottom",
            tileset=break_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Breakpoints",
            },
        ),
        "centre": Track(
            track_type="2d-rectangle-domains",
            position="center",
            tileset=break_tilesets_2D[s],
            filetype="bed2ddb",
            options={
                "showMousePosition": True,
                "name": s + " Breakpoint Pairs",
                "flipDiagonal": "copy",
            }
        ),
    } for s in break_tilesets_2D.keys()
}

break_files_1D = {s: path.join(BREAK_DIR, "Tracks", s + ".breaks.beddb") for s in tumour_samples}
break_tilesets_1D = {
    s: beddb(
        v,
        name=metadata.loc[metadata["Sample"] == s, "Label"].values[0] + " Breaks"
    ) for s, v in break_files_1D.items()
}
break_tracks_1D = {
    s: {
        p: Track(
            track_type=t + "bedlike",
            position=p,
            tileset=break_tilesets_1D[s],
            filetype="beddb",
            options={
                "showMousePosition": True,
                "name": metadata.loc[metadata["Sample"] == s, "Label"].values[0],
            },
        ) for p, t in zip(["top", "bottom", "left", "right"], ["", "", "vertical-", "vertical-"])
    } for s in tumour_samples
}

disruption_tests = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-disruption-tests.tsv"), sep="\t")
bp_singletons = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.tsv"), sep="\t")
bp_pairs = pd.read_csv(path.join("..", "2020-02-19_chromoplexy", "Graphs", "sv-breakpoints.paired.tsv"), sep="\t")
disruption_tads = pd.read_csv(path.join("..", "2020-02-19_sv-disruption-TADs", "sv-disruption-tests.TADs.tsv"), sep="\t")

# Paper figures

In [17]:
fig1_view = [
    View(
        tracks=[
            chrom_labels["top"], gene_annots["top"],
            chip[s]["top"],
            tads[s][3]["top"],
            tads[s][10]["top"],
            tads[s][17]["top"],
            tads[s][24]["top"],
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos("chrX", 65000000, hg38),
            nc.chr_pos_to_genome_pos("chrX", 68000000, hg38),
        ]
    ) for s in tumour_samples
]
display, server, viewconf = higlass.display(
    views=fig1_view,
    server_port=8192
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'D8CP1dTuSNiSX9OcBVwBMQ', 'tracks': {'top': [{'ty…

## Effects of structural variants on TAD boundaries

### A structural variant that alter local TAD boundaries

In [7]:
def plot_sv_pairs(sv_sample_id, nonsv_sample_id, pos):
    non_affecting_sv = View(
        tracks=[
            chrom_labels["top"],
            chrom_labels["left"],
            lowc_heatmaps[sv_sample_id]["top"],
            tads[sv_sample_id][3]["top"],
            lowc_heatmaps[sv_sample_id]["centre"],
            gene_annots["bottom"],
        ],
        initialYDomain=pos["y"],
        initialXDomain=pos["x"],
    )
    non_affecting_nonsv = View(
        tracks=[
            chrom_labels["top"],
            chrom_labels["left"],
            lowc_heatmaps[nonsv_sample_id]["top"],
            tads[nonsv_sample_id][3]["top"],
            lowc_heatmaps[nonsv_sample_id]["centre"],
            gene_annots["bottom"],
        ],
        initialYDomain=pos["y"],
        initialXDomain=pos["x"],
    )

    display, server, viewconf = higlass.display(
        views=[
            non_affecting_sv, non_affecting_nonsv,
        ],
        value_scale_syncs = [
            [
                (non_affecting_sv, lowc_heatmaps[sv_sample_id]["centre"]),
                (non_affecting_nonsv, lowc_heatmaps[nonsv_sample_id]["centre"]),
            ],
            [
                (non_affecting_sv, lowc_heatmaps[sv_sample_id]["top"]),
                (non_affecting_sv, lowc_heatmaps[sv_sample_id]["centre"]),
            ],
            [
                (non_affecting_nonsv, lowc_heatmaps[nonsv_sample_id]["top"]),
                (non_affecting_nonsv, lowc_heatmaps[nonsv_sample_id]["centre"]),
            ],
        ],
        zoom_syncs=[
            [non_affecting_sv, non_affecting_nonsv],
        ],
        location_syncs=[
            [non_affecting_sv, non_affecting_nonsv],
        ],
        server_port=8192
    )
    return display, server, viewconf


In [8]:
affected_breaks = affected_tads.loc[affected_tads.altered_TAD == True, :]
print(affected_breaks)
sv_sample_id = "PCa56413"
nonsv_sample_id = "PCa40507"
pos = {
    "x": [
        nc.chr_pos_to_genome_pos("chr3", 108100000, hg38),
        nc.chr_pos_to_genome_pos("chr3", 109900000, hg38),
    ],
    "y": [
        nc.chr_pos_to_genome_pos("chr3", 66120000, hg38),
        nc.chr_pos_to_genome_pos("chr3", 66200000, hg38)
    ]
}
d, s, v = plot_sv_pairs(sv_sample_id, nonsv_sample_id, pos)
d

     test_ID  altered_TAD    chr      start        end
3          4         True   chr6   83800000   85100000
90       108         True  chr17   15540000   16710000
145      186         True   chr4   70970000   72110000
150      192         True  chr15   70300000   71500000
151      194         True  chr15   71000000   72200000
161      207         True   chr3   41200000   42500000
165      213         True   chr3  108100000  109900000


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'JmE5leqkRx6I7Z3jNqczQQ', 'tracks': {'top': [{'ty…

### A structural variant that does not affect local TAD boundaries

In [20]:
sv_sample_id = "PCa13848"
nonsv_sample_id = "PCa3023"
pos = {
    "x": [
        nc.chr_pos_to_genome_pos("chr10", 4500000, hg38),
        nc.chr_pos_to_genome_pos("chr10", 7500000, hg38)
    ],
    "y": [
        nc.chr_pos_to_genome_pos("chr10", 95500000, hg38),
        nc.chr_pos_to_genome_pos("chr10", 97600000, hg38)
    ]
}
d, s, v = plot_sv_pairs(sv_sample_id, nonsv_sample_id, pos)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'E3_nU9szS3WJxQYJ4EbllQ', 'tracks': {'top': [{'ty…

# Images
## Important prostate cancer regions

In [7]:
sample_tracks = [chrom_labels["top"], gene_annots["top"]] + [lowc_heatmaps[s]["top"] for s in all_samples[::-1]]

### FOXA1

In [18]:
foxa1_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr14", 36000000, hg38),
        nc.chr_pos_to_genome_pos("chr14", 39000000, hg38)
    ],
)

display, server, viewconf = higlass.display(
    [foxa1_view],
    value_scale_syncs=[
        [(foxa1_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'dyNZ4WQIRXiWruzYlqxfPQ', 'tracks': {'top': [{'ty…

In [27]:
print("This: {}".format(display.dom_element_id))
#save_as_png(display, path.join(os.getcwd(), "Plots", "FOXA1-locus.v2.png"))
save_as_png(display, path.join("Plots", "FOXA1-locus.v2.png"))

This: u2lmw0


<IPython.core.display.Javascript object>

### _TMPRSS2 - ERG_ fusion

In [24]:
t2e_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr21", 36000000, hg38),
        nc.chr_pos_to_genome_pos("chr21", 42000000, hg38)
    ],
)

display, server, viewconf = higlass.display(
    views=[t2e_view]
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'E6dxk4ZRSQKCc4hcPkU2fQ', 'tracks': {'top': [{'ty…

### AR

In [7]:
ar_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chrX", 65800000, hg38),
        nc.chr_pos_to_genome_pos("chrX", 68700000, hg38)
    ]
)

display, server, viewconf = higlass.display(
    [ar_view],
    value_scale_syncs = [
        [(ar_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'e9esnx96QPGxmB-jq2UuyA', 'tracks': {'top': [{'ty…

### MYC

In [10]:
myc_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr8", 126000000, hg38),
        nc.chr_pos_to_genome_pos("chr8", 130000000, hg38)
    ],
)

display, server, viewconf = higlass.display(
    [myc_view],
    value_scale_syncs=[
        [(myc_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'Ls12O4rvRbCHJLt73c3xnA', 'tracks': {'top': [{'ty…

### HOXB13

In [9]:
hoxb13_view = View(
    tracks=sample_tracks,
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr17", 46160000, hg38),
        nc.chr_pos_to_genome_pos("chr17", 51320000, hg38)
    ],
)
display, server, viewconf = higlass.display(
    [hoxb13_view],
    value_scale_syncs=[
        [(hoxb13_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'Qfj5fsUZTDivGQdrspV78A', 'tracks': {'top': [{'ty…

### KLK Cluster

In [12]:
klk_view = View(
    tracks=sample_tracks,
    #tracks = [chrom_labels["top"], gene_annots["top"]],
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr19", 49000000, hg38),
        nc.chr_pos_to_genome_pos("chr19", 53000000, hg38)
    ],
)
display, server, viewconf = higlass.display(
    [klk_view],
    value_scale_syncs=[
        [(klk_view, lowc_heatmaps[s]["top"]) for s in all_samples]
    ],
    server_port=8192,
)
display


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'coYkGqDnSiu23Z7eqM5Geg', 'tracks': {'top': [{'ty…

In [None]:
myc_insertion_view = View(
    tracks=[chrom_labels["top"], gene_annots["top"]] + [lowc_heatmaps[s]["top"] for s in all_samples],
    initialXDomain=[
        nc.chr_pos_to_genome_pos("chr12", 70480000 - 5e6, hg38),
        nc.chr_pos_to_genome_pos("chr12", 70570000 + 5e6, hg38)
    ],
)

display, server, viewconf = higlass.display([
    myc_insertion_view
])
display

## Structural variants

### Breakpoint pairs

In [26]:
zoom_offset = 1e6

# test_ID
ti = 186
# break_ID(s)
i = set([int(i) for i in disruption_tests.loc[disruption_tests.test_ID == ti, "breakpoint_IDs"].values[0].split(",")])
# paired breakpoint_ID(s)
i_pairs = set(
    [j for j in bp_pairs.loc[bp_pairs.breakpoint_ID_x.isin(i), "breakpoint_ID_y"].tolist()]
    + [j for j in bp_pairs.loc[bp_pairs.breakpoint_ID_y.isin(i), "breakpoint_ID_x"].tolist()]
)
# paired test_ID(s)
#tj = disruption_tests.loc[disruption_tests.breakpoint_IDs.str.split(",").isin(i_pairs)]
tj = disruption_tests.breakpoint_IDs.apply(lambda s: set(s.split(",")))
tj = [int(k) for k in disruption_tests.loc[disruption_tests.breakpoint_IDs.apply(lambda s: len(set([int(t) for t in s.split(",")]) & i_pairs) > 0), "test_ID"].values]

# mutated sample IDs
mut_s = disruption_tests.loc[disruption_tests.test_ID == ti, "mut_samples"].values[0].split(",")
nonmut_s = disruption_tests.loc[disruption_tests.test_ID == ti, "nonmut_samples"].values[0].split(",")

# views
v = []
if len(tj) > 1:
    pos = {
        "x": {
            "chr": disruption_tads.loc[disruption_tads.test_ID == ti, "chr"].values[0],
            "start": int(disruption_tads.loc[disruption_tads.test_ID == ti, "start"].values[0]) - zoom_offset,
            "end": int(disruption_tads.loc[disruption_tads.test_ID == ti, "end"].values[0]) + zoom_offset,
        },
        "y": {
            k: {
                "chr": disruption_tads.loc[disruption_tads.test_ID == k, "chr"].values[0],
                "start": int(disruption_tads.loc[disruption_tads.test_ID == k, "start"].values[0]) - zoom_offset,
                "end": int(disruption_tads.loc[disruption_tads.test_ID == k, "end"].values[0]) + zoom_offset,
            } for k in tj
        },
    }
    v = [
        View(
            tracks=[
                chrom_labels["top"],
                gene_annots["top"],
                chip[s]["top"],
                break_tracks_1D[s]["top"],
                tads[s][3]["top"],
                tads[s][24]["top"],
                chrom_labels["left"],
                chip[s]["left"],
                break_tracks_1D[s]["left"],
                tads[s][3]["left"],
                tads[s][24]["left"],
                lowc_heatmaps[s]["centre"],
                lowc_heatmaps[s]["bottom"],
            ],
            initialXDomain=[
                nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["start"], hg38),
                nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["end"], hg38)
            ],
            initialYDomain=[
                nc.chr_pos_to_genome_pos(pos["y"][k]["chr"], pos["y"][k]["start"], hg38),
                nc.chr_pos_to_genome_pos(pos["y"][k]["chr"], pos["y"][k]["end"], hg38)
            ],
        ) for s in mut_s for k in tj
    ]
else:
    pos = {
        "x": {
            "chr": disruption_tads.loc[disruption_tads.test_ID == ti, "chr"].values[0],
            "start": int(disruption_tads.loc[disruption_tads.test_ID == ti, "start"].values[0]) - zoom_offset,
            "end": int(disruption_tads.loc[disruption_tads.test_ID == ti, "end"].values[0]) + zoom_offset,
        },
        "y": {
            "chr": disruption_tads.loc[disruption_tads.test_ID == tj[0], "chr"].values[0],
            "start": int(disruption_tads.loc[disruption_tads.test_ID == tj[0], "start"].values[0]) - zoom_offset,
            "end": int(disruption_tads.loc[disruption_tads.test_ID == tj[0], "end"].values[0]) + zoom_offset,
        },
    }
    v = [
        View(
            tracks=[
                chrom_labels["top"],
                gene_annots["top"],
                chip[s]["top"],
                peaks[s]["top"],
                break_tracks_1D[s]["top"],
                tads[s][3]["top"],
                tads[s][24]["top"],
                chrom_labels["left"],
                chip[s]["left"],
                peaks[s]["left"],
                break_tracks_1D[s]["left"],
                tads[s][3]["left"],
                tads[s][24]["left"],
                lowc_heatmaps[s]["centre"],
                lowc_heatmaps[s]["bottom"],
            ],
            initialXDomain=[
                nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["start"], hg38),
                nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["end"], hg38)
            ],
            initialYDomain=[
                nc.chr_pos_to_genome_pos(pos["y"]["chr"], pos["y"]["start"], hg38),
                nc.chr_pos_to_genome_pos(pos["y"]["chr"], pos["y"]["end"], hg38)
            ],
        ) for s in mut_s
    ]
    
for s in nonmut_s[0:2]:
    v.append(View(
        tracks=[
            lowc_heatmaps[s]["top"],
            chip[s]["top"],
            tads[s][3]["top"],
            tads[s][24]["top"],
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["start"], hg38),
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["end"], hg38)
        ],
    ))

display, server, viewconf = higlass.display(
    views=v,
    zoom_syncs=[v],
#    location_syncs=[v],
#    value_scale_syncs=[
#        [(v[s], lowc_heatmaps[s]["top"]) for s in v]
#    ],
    server_port=8192
)

display

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'dtqgqynJTiOe4xz5Jtp2WA', 'tracks': {'top': [{'ty…

## Comparing breakpoints for simultaneous testing

In [45]:
zoom_offset = 1e6

# breakpoint IDs (always in pairs)
bids = [71,73]
these_breaks = bp_singletons.loc[bp_singletons.breakpoint_ID.isin(bids)]
mut_s = np.unique(these_breaks["SampleID"])
these_pairs = bp_pairs.loc[bp_pairs.breakpoint_ID_x.isin(bids) | bp_pairs.breakpoint_ID_y.isin(bids), :]
print(these_breaks)
# swap columns so that `these_breaks` are always in the _x columns of `these_pairs`
for i, r in these_pairs.iterrows():
    if r.breakpoint_ID_y in bids:
        new_r = r
        these_pairs.at[i, "chr_x"] =  new_r.chr_y
        these_pairs.at[i, "start_x"] = new_r.start_y
        these_pairs.at[i, "end_x"] = new_r.end_y
        these_pairs.at[i, "breakpoint_ID_x"] = new_r.breakpoint_ID_y
        these_pairs.at[i, "component_ID_x"] = new_r.component_ID_y
        these_pairs.at[i, "chr_y"] = new_r.chr_x
        these_pairs.at[i, "start_y"] = new_r.start_x
        these_pairs.at[i, "end_y"] = new_r.end_x
        these_pairs.at[i, "breakpoint_ID_y"] = new_r.breakpoint_ID_x
        these_pairs.at[i, "component_ID_y"] =new_r.component_ID_x

print(these_pairs)

pos = {
    "x": {
        "chr": these_breaks["chr"].values[0],
        "start": int(min(these_breaks["start"])) - zoom_offset,
        "end": int(max(these_breaks["end"])) + zoom_offset,
    },
    "y": [
        {
            "SampleID": r.SampleID,
            "chr": r.chr_y,
            "start": int(r.start_y - zoom_offset),
            "end": int(r.end_y + zoom_offset),
        } for r in these_pairs.itertuples()
    ]
}

v = [
    # annotations
    View(
        tracks=[
            chrom_labels["top"],
            gene_annots["top"],
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["start"], hg38),
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["end"], hg38)
        ],
    )
] + [
    # contact matrices, H3K27ac signal, and breakpoints
    View(
        tracks=[
            lowc_heatmaps[s]["top"],
            tads[s][3]["top"],
            tads[s][24]["top"],
            chip[s]["top"],
            break_tracks_1D[s]["top"],
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["start"], hg38),
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["end"], hg38)
        ],
    ) for s in mut_s
] + [
    # breakpoint detection in contact matrix
    View(
        tracks=[
            break_tracks_1D[y["SampleID"]]["top"],
            chrom_labels["left"],
            gene_annots["left"],
            chip[y["SampleID"]]["left"],
            break_tracks_1D[y["SampleID"]]["left"],
            CombinedTrack([
                lowc_heatmaps[y["SampleID"]]["centre"],
                break_tracks_2D[y["SampleID"]]["centre"],
            ]),
        ],
        initialXDomain=[
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["start"], hg38),
            nc.chr_pos_to_genome_pos(pos["x"]["chr"], pos["x"]["end"], hg38)
        ],
        initialYDomain=[
            nc.chr_pos_to_genome_pos(y["chr"], y["start"], hg38),
            nc.chr_pos_to_genome_pos(y["chr"], y["end"], hg38)
        ],
    ) for y in pos["y"]
]

display, server, viewconf = higlass.display(
    views=v,
    zoom_syncs=[v],
    #location_syncs=[v],
    server_port=8192
)

display
#print(break_tracks_2D[pos["y"][0]["SampleID"]]["centre"])

      chr    start      end  SampleID  breakpoint_ID  component_ID
72  chr16  4450000  4700000  PCa19121             71             6
73  chr16  4800000  5100000  PCa19121             73             7
    chr_x  start_x    end_x  chr_y   start_y     end_y  breakpoint_ID_x  \
32  chr16  4450000  4700000   chr8  41510000  41550000               71   
33  chr16  4800000  5100000  chr16  71100000  71600000               73   

    breakpoint_ID_y  component_ID_x  component_ID_y  SampleID  sv_type  
32               72               6               6  PCa19121      BND  
33               74               7               7  PCa19121  UNKNOWN  


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'J7zjIb6BRuGfzK_ojFe-Ug', 'tracks': {'top': [{'ty…