# Multicontact Data in HiGlass

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

### Download Test Data

In [2]:
from utils import download_file

filepath_100k = download_file(
    'https://4dn-dcic-public.s3.amazonaws.com/multi_contact_files/100k_test3.hdf5',
    '100k_test3.hdf5',
    base='..'
)

filepath_10k = download_file(
    'https://4dn-dcic-public.s3.amazonaws.com/multi_contact_files/10k_test3.h5',
    '10k_test3.h5',
    base='..'
)

File already exist. To overwrite pass `overwrite=True`
File already exist. To overwrite pass `overwrite=True`


### Tileset Implementation

In [3]:
import h5py
import higlass as hg
import math
import numpy as np
import pandas as pd

from clodius.tiles.format import format_dense_tile

def mc_1d(filepath, anchors=[], **kwargs):
    """
    Tileset for multicontact 1D data
    """
    
    tile_size = 1024
    
    def filter_data(filepath, anchors=[]):
        with h5py.File(filepath, 'r') as f:
            data = pd.DataFrame({
                'bin': f['clusters']['bin'],
                'cluster': f['clusters']['cluster_name']
            })
            
            min_pos = int(data['bin'].values.min())
            max_pos = int(data['bin'].values.max())
        
        counts_by_bin = np.zeros(max_pos - min_pos + 1)

        max_cluster_name = data['cluster'].max() + 1
        cluster_idx = np.zeros(max_cluster_name, dtype='bool')

        for anchor in anchors:
            clusters = data['cluster'][data['bin'] == anchor]
            cluster_idx[clusters] = True
            data = data.iloc[cluster_idx[data['cluster'].values]]
            cluster_idx[clusters] = False

        counts = data.groupby('bin').count()
        counts_by_bin[counts.index.values] = counts['cluster'].values

        return counts_by_bin, min_pos, max_pos
    
    data, min_pos, max_pos = filter_data(filepath, anchors)
    
    data_size = data.size
    not_nan_data = ~np.isnan(data)
    
    max_zoom = math.ceil(math.log(max_pos / tile_size) / math.log(2))
    max_zoom = 0 if max_zoom < 0 else max_zoom

    tsinfo = {
        'tile_size': tile_size,
        'bins_per_dimension': tile_size,
        'min_pos': [min_pos],
        'max_pos': [max_pos],
        'max_zoom': max_zoom,
        'max_width': 2 ** max_zoom * 1024,
    }

    def generate_tile(z, x):
        '''
        Return tiles at the given positions.
        Parameters
        -----------
        z: int
            The zoom level (0 corresponds to most zoomed out)
        x: int
            The x tile position
        '''
        
        tile_width = 2 ** (max_zoom - z) * tile_size

        x_start = x * tile_width
        x_end = min(max_pos, x_start + tile_width)
        
        tile_data = data[x_start:x_end]
        tile_data

        num_to_sum = 2 ** (max_zoom - z)

        # add some data so that the data can be divided into squares
        divisible_x_width = num_to_sum * math.ceil(tile_data.shape[0] / num_to_sum)
        divisible_x_pad = divisible_x_width - tile_data.shape[0]

        padded_data = np.pad(
            tile_data, ((0, divisible_x_pad),), 'constant', constant_values=(np.nan,)
        )

        out_data = np.nansum(padded_data.reshape((-1, num_to_sum)), axis=1)
        not_nan_out_data = not_nan_data[x_start:x_end]

        # we want to calculate the means of the data points
        na = np.pad(
            not_nan_out_data,
            ((0, divisible_x_pad)),
            'constant',
            constant_values=(np.nan,)
        )
        norm_out_data = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
        out_data = out_data / (norm_out_data + 1)

        # determine how much to pad the array
        x_pad = tile_size - out_data.shape[0]

        return np.pad(
            out_data, ((0, x_pad)), 'constant', constant_values=(np.nan, )
        )
    
    def tileset_info():
        return tsinfo
    
    def tiles(tile_ids):
        tiles = []
        
        for tile_id in tile_ids:
            # decompose the tile zoom and location
            _, z, x = tile_id.split('.')
            
            # generate the tile
            data = generate_tile(int(z), int(x))
            
            # format the tile response
            tiles.append((tile_id, format_dense_tile(data)))
    
        return tiles
    

    return hg.Tileset(
        tileset_info=tileset_info,
        tiles=tiles,
        **kwargs
    )

### Code for enabling selections in HiGlass

In [4]:
import ipywidgets as widgets

def enable_selection(widget):
    select_mode = widgets.ToggleButton(value=False, description='Select Mode')
    x_from = widgets.IntText(value=None, description='From:')
    x_to = widgets.IntText(value=None, description='To:')

    def handle_selection(event, widget):
        try:
            # `event['data']` is the event data
            # `[0]` is the first argument, which is typically a dict
            x_from.value = event['data'][0]['dataRange'][0][0]
            x_to.value = event['data'][0]['dataRange'][0][1]
        except:
            print('Oh no...')

    widget.on('selection', handle_selection)
    widgets.jslink((widget, 'select_mode'), (select_mode, 'value'))
    
    return select_mode, x_from, x_to

### Global Track Config

In [5]:
track_config = {
    'track_type': 'horizontal-line',
    'position': 'top',
    'height': 128,
    'options': {
        'colorRange': ['#ffbb33', '#e5001c', 'black'],
        'labelColor': 'red',
        'backgroundColor': 'white',
    },
}
axis = hg.Track('top-axis')

---

## 0 Anchors (Coverage)

In [6]:
from IPython.display import display

# ts_100k_3a = mc_1d(filepath_100k, anchors=[10885, 10892, 10814], name='100k Test Data')
ts_100k_0a = mc_1d(filepath_100k, name='100k Test Data: 0 Anchors')

widget_0a, _, _ = hg.display([hg.View([axis, hg.Track(tileset=ts_100k_0a, **track_config)])])
select_mode_0a, x_from_0a, x_to_0a = enable_selection(widget_0a)

display(select_mode_0a, widget_0a, x_from_0a, x_to_0a)

AttributeError: 'HiGlassDisplay' object has no attribute 'on'

In [7]:
display(widget_0a)

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'WUyagqQeTLmWrRY1f6D6gQ', 'tracks': {'top': [{'ty…

### 1. Anchor

In [65]:
# For now just the midpoint
anchor_1 = int(x_from_0a.value + ((x_to_0a.value - x_from_0a.value) / 2))
print('Anchor at {}'.format(anchor_1))

ts_100k_1a = mc_1d(filepath_100k, anchors=[anchor_1], name='100k Test Data: 1 Anchor')

widget_1a, _, _ = hg.display([hg.View([axis, hg.Track(tileset=ts_100k_1a, **track_config)])])
select_mode_1a, x_from_1a, x_to_1a = enable_selection(widget_1a)

display(select_mode_1a, widget_1a, x_from_1a, x_to_1a)

Anchor at 17343


ToggleButton(value=False, description='Select Mode')

HiGlassDisplay(hg_options={'theme': 'light'}, viewconf={'editable': True, 'views': [{'uid': 'CLdjQYAgQOipBF3f9…

IntText(value=0, description='From:')

IntText(value=0, description='To:')

In [67]:
# For now just the midpoint
anchor_2 = int(x_from_1a.value + ((x_to_1a.value - x_from_1a.value) / 2))
print('Anchor at {}'.format(anchor_2))
24
ts_100k_2a = mc_1d(filepath_100k, anchors=[anchor_1, anchor_2], name='100k Test Data: 2 Anchor')

widget_2a, _, _ = hg.display([hg.View([axis, hg.Track(tileset=ts_100k_2a, **track_config)])])
select_mode_2a, x_from_2a, x_to_2a = enable_selection(widget_2a)

display(select_mode_2a, widget_2a, x_from_2a, x_to_2a)

Anchor at 17394


ToggleButton(value=False, description='Select Mode')

HiGlassDisplay(hg_options={'theme': 'light'}, viewconf={'editable': True, 'views': [{'uid': 'ZP68XyCaRSqm7Koz6…

IntText(value=0, description='From:')

IntText(value=0, description='To:')