# Explore 3 simulated targets

In [1]:
%load_ext autoreload
%autoreload 2

import higlass as hg

In [2]:
import base64
import cooler
import numpy as np
import pandas as pd
from random import random
from clodius.tiles.format import format_dense_tile
from clodius.tiles.utils import tile_bounds

def bigbed_like(
    bedlike_filepath: str,
    chromsizes_filepath: str = None,
    uuid: str = None,
    aggregator: callable = np.mean,
    log_scale: bool = False,
    categories: dict = None
):
    TILE_SIZE = 1024
    chromsizes = pd.read_csv(
        chromsizes_filepath,
        sep = '\t',
        index_col = 0,
        usecols = [0, 1],
        names = [None, 'size'],
        header = None
    )
    cum_chromsizes = np.cumsum(chromsizes.values)
    min_tile_cover = np.ceil(np.sum(chromsizes) / TILE_SIZE)
    max_zoom = int(np.ceil(np.log2(min_tile_cover)))
    resolutions = [2 ** x for x in range(max_zoom + 1)][::-1]
    
    bedlike = pd.read_csv(
        bedlike_filepath,
        sep = '\t',
        index_col = None,
        usecols = [0, 1, 2, 3, 4],
        names = ['chrom', 'start', 'end', 'name', 'score'],
        header = None
    )
    
    dense = np.zeros(cum_chromsizes[-1])

    # Densify bed data for later downsampling
    k = 0
    if categories is None:
        for region in bedlike.iterrows():
            length = int(region[1]['end'] - region[1]['start'])
            dense[k : k + length] = region[1]['score']
            k += length
    else:
        for region in bedlike.iterrows():
            length = int(region[1]['end'] - region[1]['start'])
            try:
                dense[k : k + length] = categories[region[1]['name']]
            except KeyError:
                dense[k : k + length] = categories['__others__']
            k += length
            
    if log_scale:
        dense += 1
        dense = np.log(dense)
    
    def tileset_info(chromsizes):
        tileset_info = {
            "min_pos": [0],
            "max_pos": [TILE_SIZE * 2 ** max_zoom],
            "max_width": TILE_SIZE * 2 ** max_zoom,
            "tile_size": TILE_SIZE,
            "max_zoom": max_zoom,
        }
        return tileset_info
    
    def abs2genomic(chromsizes, start_pos, end_pos):
        abs_chrom_offsets = np.r_[0, cum_chromsizes]
        cid_lo, cid_hi = np.searchsorted(abs_chrom_offsets, [start_pos, end_pos], side="right") - 1
        rel_pos_lo = start_pos - abs_chrom_offsets[cid_lo]
        rel_pos_hi = end_pos - abs_chrom_offsets[cid_hi]
        start = rel_pos_lo
        for cid in range(cid_lo, cid_hi):
            yield cid, start, int(chromsizes.iloc[cid])
            start = 0
        yield cid_hi, start, rel_pos_hi
        
    def downsample(data, bins):
        dim = data.shape[0]

        assert(dim >= bins)
        
        # Downsampling factor
        factor = np.round(dim / bins)
        
        # Temporary dimension to support downsampling by an integer
        tmp_dim = int(bins * factor)
        diff = tmp_dim - dim
        
        left_pad = int(np.floor(np.abs(diff) / 2))
        right_pad = int(np.ceil(np.abs(diff) / 2))
        
        tmp = np.zeros(tmp_dim)

        if diff == 0:
            tmp = data
        elif diff > 0:
            # tmp is larger than data
            tmp[left_pad:tmp_dim - right_pad] = data
            tmp[:left_pad] = data[0]
            tmp[-right_pad:] = data[-1]
        else:
            # tmp is smaller than data
            tmp[:] = data[left_pad:dim - right_pad]

        return aggregator(tmp.reshape((int(tmp_dim / factor), -1)), axis = 1)
        
    def fetch(chrom, start, end, bins):        
        # Downsample
        return downsample(dense[start:end], bins)
        
    
    def get_tile(zoom_level, start_pos, end_pos):
        binsize = resolutions[zoom_level]

        arrays = []
        for cid, start, end in abs2genomic(chromsizes, start_pos, end_pos):
            bins = int(np.ceil((end - start) / binsize))
            try:
                chrom = chromsizes.index[cid]
                clen = chromsizes.values[cid]

                x = fetch(chrom, start, end, bins)

                # drop the very last bin if it is smaller than the binsize
                if end == clen and clen % binsize != 0:
                    x = x[:-1]
            except IndexError as e:
                # beyond the range of the available chromosomes
                # probably means we've requested a range of absolute
                # coordinates that stretch beyond the end of the genome
                x = np.zeros(bins)

            arrays.append(x)

        return np.concatenate(arrays)
    
    def tiles(tile_ids):
        generated_tiles = []
        
        for tile_id in tile_ids:
            # decompose the tile zoom and location
            _, zoom_level, tile_pos = tile_id.split('.')
            zoom_level = int(zoom_level)
            tile_pos = int(tile_pos)

            tile_size = TILE_SIZE * 2 ** (max_zoom - zoom_level)
            start_pos = tile_pos * tile_size
            end_pos = start_pos + tile_size
            
            # generate the tile
            data = get_tile(zoom_level, start_pos, end_pos)
            
            # format the tile response
            generated_tiles.append((tile_id, format_dense_tile(data)))
    
        return generated_tiles

    return hg.Tileset(
        tileset_info=lambda: tileset_info(chromsizes),
        tiles=lambda tids: tiles(tids),
        uuid=uuid,
    )

In [3]:
def extract_annotations(bedfile, features, chromsizesfile):

    chromsizes = pd.read_csv(
        chromsizesfile,
        sep = '\t',
        index_col = 0,
        usecols = [0, 1],
        names = [None, 'size'],
        header = None
    )
    cum_chromsizes = chromsizes.cumsum() - chromsizes.iloc[0]['size']
    
    num_annotations_type = 0
    for feature in features:
        for annotation_type in features[feature]:
            num_annotations_type = max(
                num_annotations_type,
                annotation_type
            )
    num_annotations_type += 1
    
    annotations = []
    for i in range(num_annotations_type):
        annotations.append([])
    
    bed = pd.read_csv(
        bedfile,
        sep = '\t',
        index_col = None,
        usecols = [0, 1, 2, 3, 4],
        names = ['chrom', 'start', 'end', 'name', 'score'],
        header = None
    )
    
    for region in bed.iterrows():
        feature = region[1]['name']
        if feature in features:
            for annotation_type in features[feature]:
                offset = cum_chromsizes.loc[region[1]['chrom']]['size']
                annotations[annotation_type].append([
                    offset + region[1]['start'],
                    offset + region[1]['end']
                ])
    
    return annotations

def createAnnosTrack(uid, regions):
    return hg.Track(
        'horizontal-1d-annotations',
        uid=uid,
        position='top',
        height=8,
        options={
            'trackBorderWidth': 1,
            'trackBorderColor': '#f2f2f2',
            'regions': regions,
            'minRectWidth': 4,
            'fill': '#c17da5',
            'fillOpacity': 1,
            'strokeWidth': 0,
        }
    )

In [4]:
ts_bp_fc_signal_1 = hg.tilesets.bigwig('data/fold-change-target-1.bigWig')
ts_bp_chip_signal_1 = hg.tilesets.bigwig('data/signal-target-1.bigWig')
ts_bp_chip_signal_1_distorted = hg.tilesets.bigwig('data/signal-target-1-distorted-peaks.bigWig')

ts_bp_fc_signal_2 = hg.tilesets.bigwig('data/fold-change-target-2.bigWig')
ts_bp_chip_signal_2 = hg.tilesets.bigwig('data/signal-target-2.bigWig')
ts_bp_chip_signal_2_distorted = hg.tilesets.bigwig('data/signal-target-2-distorted-peaks.bigWig')

ts_bp_fc_signal_3 = hg.tilesets.bigwig('data/fold-change-target-3.bigWig')
ts_bp_chip_signal_3 = hg.tilesets.bigwig('data/signal-target-3.bigWig')
ts_bp_chip_signal_3_distorted = hg.tilesets.bigwig('data/signal-target-3-distorted-peaks.bigWig')

ts_bp_input_signal = hg.tilesets.bigwig('data/signal-baseline.bigWig')

ts_feature_scores = bigbed_like(
    'data/simulated-features-3-targets.bed',
    'data/simulated-genome-chrom-sizes.tsv',
    uuid = 'feature_scores',
    aggregator = np.max,
    log_scale = True
)

ts_features = bigbed_like(
    'data/simulated-features-3-targets.bed',
    'data/simulated-genome-chrom-sizes.tsv',
    uuid = 'features',
    aggregator = np.max,
    categories = {
        "__others__": 0.0,
        "Background": 1.0,
        "BindingA": 2.0,
        "BindingB": 3.0,
        "BindingAB": 4.0
    }
)

annos_1, annos_2, annos_3, annos_12, annos_13, annos_23, annos_all = extract_annotations(
    'data/simulated-features-3-targets.bed',
    {
        'BindingA': [0],
        'BindingB': [1],
        'BindingC': [2],
        'BindingAB': [0, 1, 3],
        'BindingAC': [0, 2, 4],
        'BindingBC': [1, 2, 5],
        'BindingABC': [0, 1, 2, 3, 4, 5, 6]
    },
    'data/simulated-genome-chrom-sizes.tsv',
)

In [9]:
tr_top_axis = hg.Track(track_type='top-axis', position='top', uid="axis")

tr_target_1 = createAnnosTrack('target_1', annos_1)
tr_target_2 = createAnnosTrack('target_2', annos_2)
tr_target_3 = createAnnosTrack('target_3', annos_3)
tr_target_12 = createAnnosTrack('target_12', annos_12)
tr_target_13 = createAnnosTrack('target_13', annos_13)
tr_target_23 = createAnnosTrack('target_23', annos_23)

tr_bp_fc_signal_1 = hg.Track(
    'horizontal-bar',
    uid='fc_signal_1',
    tileset=ts_bp_fc_signal_1,
    position='top',
    height=48,
    options={
        'name': 'Target 1 fold-change',
        'labelColor': '#6fb2e4',
        'colorRange': ['#6fb2e4', '#0064a8'],
        'valueScaleMin': 0
    }
)
tr_bp_chip_signal_1 = hg.Track(
    'horizontal-bar',
    uid='chip_signal_1',
    tileset=ts_bp_chip_signal_1,
    position='top',
    height=48,
    options={
        'name': 'Target 1 reads',
        'labelColor': '#6fb2e4',
        'colorRange': ['#6fb2e4', '#0064a8'],
    }
)

tr_bp_fc_signal_2 = hg.Track(
    'horizontal-bar',
    uid='fc_signal_2',
    tileset=ts_bp_fc_signal_2,
    position='top',
    height=48,
    options={
        'name': 'Target 2 fold-change',
        'labelColor': '#6fb2e4',
        'colorRange': ['#6fb2e4', '#0064a8'],
        'valueScaleMin': 0
    }
)
tr_bp_chip_signal_2 = hg.Track(
    'horizontal-bar',
    uid='chip_signal_2',
    tileset=ts_bp_chip_signal_2,
    position='top',
    height=48,
    options={
        'name': 'Target 2 reads',
        'labelColor': '#6fb2e4',
        'colorRange': ['#6fb2e4', '#0064a8'],
    }
)

tr_bp_fc_signal_3 = hg.Track(
    'horizontal-bar',
    uid='fc_signal_3',
    tileset=ts_bp_fc_signal_3,
    position='top',
    height=48,
    options={
        'name': 'Target 3 fold-change',
        'labelColor': '#6fb2e4',
        'colorRange': ['#6fb2e4', '#0064a8'],
        'valueScaleMin': 0
    }
)
tr_bp_chip_signal_3 = hg.Track(
    'horizontal-bar',
    uid='chip_signal_3',
    tileset=ts_bp_chip_signal_3,
    position='top',
    height=48,
    options={
        'name': 'Target 3 reads',
        'labelColor': '#6fb2e4',
        'colorRange': ['#6fb2e4', '#0064a8'],
    }
)

tr_bp_input_signal = hg.Track(
    'horizontal-bar',
    uid='input_signal',
    tileset=ts_bp_input_signal,
    position='top',
    height=48,
    options={
        'name': 'Baseline reads',
        'labelColor': '#888888',
        'barFillColor': '#888888'
    }
)

tr_chip_feature_scores = hg.Track(
    'horizontal-bar',
    uid='feature_scores',
    tileset=ts_feature_scores,
    position='top',
    height=16,
    options={
        "name": "Binding scores (log)",
        "colorRange": [
            "#f2f2f2",
            "#f2f2f2",
            "#f2f2f2",
            "#f2f2f2",
            "#dddddd",
            "#bbbbbb",
            "#888888"
        ],
        "labelColor": "#666666",
    }
)
tr_chip_features = hg.Track(
    'horizontal-1d-heatmap',
    tileset=ts_features,
    position='top',
    height=16,
    options={
        "name": "ChIP sim features",
        "colorRange": [
            "#ffffff",
            "#f2f2f2",
            "#0064a8",
            "#dca237",
            "#469b76"
        ],
        "labelColor": "#666666",
        "valueScaleMin": 0,
        "valueScaleMax": 4
    }
)

overlays = [{
    "uid": "overlays",
    "includes": [
        "feature_scores",
        "target_1",
        "target_2",
        "target_3",
        "chip_signal_1",
        "chip_signal_2",
        "chip_signal_3",
        "input_signal",
    ],
    "options": {
        "extent": [
            [int(x[0]), int(x[1])] for x in annos_all
        ],
        "minWidth": 6,
        "fill": "#c17da5",
        "fillOpacity": 0.1,
        "stroke": "white",
        "strokeWidth": 1,
        "strokePos": ["left", "right"],
        "outline": "#c17da5",
        "outlineWidth": 2,
        "outlinePos": ["left", "right"]
    }
}]

In [22]:
import logging
widget, server, _ = hg.display(
    views=[
        hg.View(
            [
                tr_top_axis,
                tr_chip_feature_scores,
                tr_target_1,
                tr_target_2,
                tr_target_3,
                tr_bp_chip_signal_1,
                tr_bp_chip_signal_2,
                tr_bp_chip_signal_3,
                tr_bp_input_signal,
            ],
            uid='v',
            initialXDomain=[0, 1.2e6],
            overlays=overlays
        )
    ],
#     value_scale_syncs=[[tr_bp_chip_signal_1, tr_bp_chip_signal_2, tr_bp_chip_signal_3, tr_bp_input_signal]],
    value_scale_syncs=[['v.chip_signal_1', 'v1.chip_signal_2', 'v.chip_signal_3', 'v.input_signal']],
    no_fuse=True,
    log_level=logging.DEBUG
)

widget

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'v', 'tracks': {'top': [{'type': 'top-axis', 'uid…

In [11]:
tr_bp_chip_signal_distorted = []
ts_bp_chip_signal_distorted = [ts_bp_chip_signal_1_distorted, ts_bp_chip_signal_2_distorted, ts_bp_chip_signal_3_distorted]

for i in range(3):
    tr_bp_chip_signal_distorted.append(hg.Track(
        'horizontal-bar',
        uid=f"chip_signal_{i + 1}_distorted",
        tileset=ts_bp_chip_signal_distorted[i],
        position='top',
        height=48,
        options={
            'name': f"Target {i + 1} reads distorted",
            'labelColor': '#6fb2e4',
            'colorRange': ['#6fb2e4', '#0064a8'],
        }
    ))

overlays = [{
    "uid": "overlays",
    "includes": [
        "feature_scores",
        "target_1",
        "target_2",
        "target_3",
        "chip_signal_1_distorted",
        "chip_signal_2_distorted",
        "chip_signal_3_distorted",
        "input_signal",
    ],
    "options": {
        "extent": [
            [int(x[0]), int(x[1])] for x in annos_all
        ],
        "minWidth": 6,
        "fill": "#c17da5",
        "fillOpacity": 0.1,
        "stroke": "white",
        "strokeWidth": 1,
        "strokePos": ["left", "right"],
        "outline": "#c17da5",
        "outlineWidth": 2,
        "outlinePos": ["left", "right"]
    }
}]

%env HIGLASS_PYTHON_DEBUG=True

widget, server, _ = hg.display(
    views=[
        hg.View(
            [
                tr_top_axis,
                tr_chip_feature_scores,
                tr_target_1,
                tr_target_2,
                tr_target_3,
                tr_bp_chip_signal_distorted[0],
                tr_bp_chip_signal_distorted[1],
                tr_bp_chip_signal_distorted[2],
                tr_bp_input_signal,
            ],
            uid='v',
            initialXDomain=[0, 1.2e6],
            overlays=overlays
        )
    ],
    value_scale_syncs=[[
        tr_bp_chip_signal_distorted[0],
        tr_bp_chip_signal_distorted[1],
        tr_bp_chip_signal_distorted[2],
        tr_bp_input_signal
    ]],
    no_fuse=True
)

widget

env: HIGLASS_PYTHON_DEBUG=True


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'v', 'tracks': {'top': [{'type': 'top-axis', 'uid…

# Large 12 Mbps

In [29]:
tr_top_axis = hg.Track(track_type='top-axis', position='top', uid="axis")

ts_bp_chip_signal_1_distorted_l = hg.tilesets.bigwig('data/signal-target-1-distorted-peaks-12000000-1000000.bigWig')
ts_bp_chip_signal_2_distorted_l = hg.tilesets.bigwig('data/signal-target-2-distorted-peaks-12000000-1000000.bigWig')
ts_bp_chip_signal_3_distorted_l = hg.tilesets.bigwig('data/signal-target-3-distorted-peaks-12000000-1000000.bigWig')
ts_bp_input_signal_l = hg.tilesets.bigwig('data/signal-baseline-distorted-peaks-12000000-1000000.bigWig')

ts_bp_chip_signal_distorted_l = [
    ts_bp_chip_signal_1_distorted_l,
    ts_bp_chip_signal_2_distorted_l, ts_bp_chip_signal_3_distorted_l
]

ts_feature_scores_l = bigbed_like(
    'data/simulated-features-3-targets-12000000-1000000.bed',
    'data/simulated-genome-chrom-sizes-12000000-1000000.tsv',
    uuid = 'feature_scores',
    aggregator = np.max,
    log_scale = True
)

ts_features_l = bigbed_like(
    'data/simulated-features-3-targets-12000000-1000000.bed',
    'data/simulated-genome-chrom-sizes-12000000-1000000.tsv',
    uuid = 'features',
    aggregator = np.max,
    categories = {
        "__others__": 0.0,
        "Background": 1.0,
        "BindingA": 2.0,
        "BindingB": 3.0,
        "BindingAB": 4.0
    }
)

ts_feature_scores_l = bigbed_like(
    'data/simulated-features-3-targets-12000000-1000000.bed',
    'data/simulated-genome-chrom-sizes-12000000-1000000.tsv',
    uuid = 'feature_scores',
    aggregator = np.max,
    log_scale = True
)

annos_1_l, annos_2_l, annos_3_l, annos_12_l, annos_13_l, annos_23_l, annos_all_l = extract_annotations(
    'data/simulated-features-3-targets-12000000-1000000.bed',
    {
        'BindingA': [0],
        'BindingB': [1],
        'BindingC': [2],
        'BindingAB': [0, 1, 3],
        'BindingAC': [0, 2, 4],
        'BindingBC': [1, 2, 5],
        'BindingABC': [0, 1, 2, 3, 4, 5, 6]
    },
    'data/simulated-genome-chrom-sizes-12000000-1000000.tsv',
)

In [30]:
tr_target_1_l = createAnnosTrack('target_1_l', annos_1_l)
tr_target_2_l = createAnnosTrack('target_2_l', annos_2_l)
tr_target_3_l = createAnnosTrack('target_3_l', annos_3_l)

tr_bp_input_signal_l = hg.Track(
    'horizontal-bar',
    uid='input_signal_l',
    tileset=ts_bp_input_signal_l,
    position='top',
    height=48,
    options={
        'name': 'Baseline reads',
        'labelColor': '#888888',
        'barFillColor': '#888888'
    }
)

tr_chip_feature_scores_l = hg.Track(
    'horizontal-bar',
    uid='feature_scores_l',
    tileset=ts_feature_scores_l,
    position='top',
    height=16,
    options={
        "name": "Binding scores (log)",
        "colorRange": [
            "#f2f2f2",
            "#f2f2f2",
            "#f2f2f2",
            "#f2f2f2",
            "#dddddd",
            "#bbbbbb",
            "#888888"
        ],
        "labelColor": "#666666",
    }
)

tr_bp_chip_signal_distorted_l = []

for i in range(3):
    tr_bp_chip_signal_distorted_l.append(hg.Track(
        'horizontal-bar',
        uid=f"chip_signal_{i + 1}_distorted_l",
        tileset=ts_bp_chip_signal_distorted_l[i],
        position='top',
        height=48,
        options={
            'name': f"Target {i + 1} reads distorted",
            'labelColor': '#6fb2e4',
            'colorRange': ['#6fb2e4', '#0064a8'],
        }
    ))

overlays = [{
    "uid": "overlays",
    "includes": [
        "feature_scores_l",
        "target_1_l",
        "target_2_l",
        "target_3_l",
        "chip_signal_1_distorted_l",
        "chip_signal_2_distorted_l",
        "chip_signal_3_distorted_l",
        "input_signal_l",
    ],
    "options": {
        "extent": [
            [int(x[0]), int(x[1])] for x in annos_all_l
        ],
        "minWidth": 6,
        "fill": "#c17da5",
        "fillOpacity": 0.1,
        "stroke": "white",
        "strokeWidth": 1,
        "strokePos": ["left", "right"],
        "outline": "#c17da5",
        "outlineWidth": 2,
        "outlinePos": ["left", "right"]
    }
}]

%env HIGLASS_PYTHON_DEBUG=True

widget, server, _ = hg.display(
    views=[
        hg.View(
            [
                tr_top_axis,
                tr_chip_feature_scores_l,
                tr_target_1_l,
                tr_target_2_l,
                tr_target_3_l,
                tr_bp_chip_signal_distorted_l[0],
                tr_bp_chip_signal_distorted_l[1],
                tr_bp_chip_signal_distorted_l[2],
                tr_bp_input_signal_l,
            ],
            uid='v',
            initialXDomain=[0, 1.2e7],
            overlays=overlays
        )
    ],
    value_scale_syncs=[[
        tr_bp_chip_signal_distorted_l[0],
        tr_bp_chip_signal_distorted_l[1],
        tr_bp_chip_signal_distorted_l[2],
        tr_bp_input_signal_l
    ]],
    no_fuse=True
)

widget

env: HIGLASS_PYTHON_DEBUG=True


HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'v', 'tracks': {'top': [{'type': 'top-axis', 'uid…

In [None]:
tr_top_axis = hg.Track(track_type='top-axis', position='top')
tr_sp_fc_signal = hg.Track(
    'horizontal-bar',
    tileset=ts_sp_fc_signal,
    position='top',
    height=48,
    options={
        'name': 'ChIP sim spiked peaks fc signal',
        'barFillColor': '#008ca8',
        'valueScaleMin': 0
    }
)
tr_sp_chip_signal = hg.Track(
    'horizontal-bar',
    tileset=ts_sp_chip_signal,
    position='top',
    height=48,
    options={ 'name': 'ChIP sim spiked peaks chip signal', 'barFillColor': '#0064a8' }
)
tr_sp_input_signal = hg.Track(
    'horizontal-bar',
    tileset=ts_sp_input_signal,
    position='top',
    height=48,
    options={ 'name': 'ChIP sim spiked peaks input signal', 'barFillColor': '#999999' }
)