In [7]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import numpy as np
import json
import urllib.request
import numpy as np
import time
import dash
import dash_core_components as dcc
import dash_html_components as html

#authenticate plotly:
plotly.tools.set_credentials_file(username='smorabito', api_key='BoBmLuy3saDfBmpvhsM7')

#necessary for plotly offline:
init_notebook_mode(connected=True)

In [8]:
#better dummy data:
var_data = {
    "chromatin_state": [
        {
            "state": "enhA1",
            "biosample_term_name": "skeletal muscle"
        },
        {
            "state": "promoter",
            "biosample_term_name": "islet"
        }
    ],
    "footprints": [
        {
            "footprint": "HNF4",
            "biosample_term_name": "islet"
        },
        {
            "footprint": "PPARA",
            "biosample_term_name": "islet",
        },
        {
            "footprint": "TCF3",
            "biosample_term_name": "hippocampus"
        },
        {
            "footprint": "STAT",
            "biosample_term_name": "skeletal muscle"
        }
        
    ],
    "ChIP": [
        {
            "target": "FOXA2",
            "biosample_term_name": "islet",
        },
        {
            "target": "NKX2.2",
            "biosample_term_name": "skeletal muscle"
        },
        {
            "target": "CTCF",
            "biosample_term_name": "islet"
        },
    ]
    
}

In [9]:
#start dealing with actual JSON to build these graphs:

def rsid_url(rsid):
    return "https://www.t2depigenome.org/peak_metadata/region=%s&genome=GRCh37/peak_metadata.tsv" % rsid

In [10]:
def generate_positions(var_data, var_name, vert_space=4, box_width=10, \
                       box_height=4, text_y0=100, plot_width=100, offset=20, expanded=True):
    
    #get number of annotations
    num_annotations = len(var_data.keys())
    num_sides = int(num_annotations/2)   
    
    positions = {
        var_name: {
            "text-coords": [plot_width/2, text_y0],
            "shape-coords": [
                [(plot_width/2)-box_width/2, text_y0-box_height/2],
                [(plot_width/2)+box_width/2, text_y0+box_height/2]
            ],
            "annotations": {key: {"items": val} for key, val in var_data.items()}
        }
    }
    
    for i, anno in enumerate(positions[var_name]["annotations"].keys()):
        #get x offset:
        if num_annotations % 2 == 1 and i == int(num_annotations/2)+1:
            x_offset = positions[var_name]["text-coords"][0] + offset
        elif i+1 <= num_sides: 
            x_offset = positions[var_name]["text-coords"][0] - offset*(i+1)
        elif i+1 > num_sides and num_annotations % 2 == 1:
            x_offset = positions[var_name]["text-coords"][0] + offset*(i-int(num_annotations/2))
        else:
            x_offset = positions[var_name]["text-coords"][0] + offset*((i+1)-int(num_annotations/2))
            
        #set text coordinates for this annotation
        positions[var_name]["annotations"][anno]["text-coords"] = [
            x_offset, 
            text_y0-(2*vert_space)
        ]
        
        #set shape coordinates for this annotation
        positions[var_name]["annotations"][anno]["shape-coords"] = [
            [
                positions[var_name]["annotations"][anno]["text-coords"][0]-box_width/2,
                positions[var_name]["annotations"][anno]["text-coords"][1]-box_height/2
            ],
            [
                positions[var_name]["annotations"][anno]["text-coords"][0]+box_width/2,
                positions[var_name]["annotations"][anno]["text-coords"][1]+box_height/2
            ]
        ]
        
        if expanded == True:
            for j, item in enumerate(positions[var_name]["annotations"][anno]["items"]):
                if j == 0:
                    item["text-coords"] = [
                        positions[var_name]["annotations"][anno]["text-coords"][0],
                        positions[var_name]["annotations"][anno]["text-coords"][1] - box_height
                    ]
                else:
                    item["text-coords"] = [
                        positions[var_name]["annotations"][anno]["items"][j-1]["text-coords"][0],
                        positions[var_name]["annotations"][anno]["items"][j-1]["text-coords"][1] - box_height
                    ]
                item["shape-coords"] = [
                    [item["text-coords"][0]-box_width/2, item["text-coords"][1]-box_height/2],
                    [item["text-coords"][0]+box_width/2, item["text-coords"][1]+box_height/2]
                ]

    return positions

In [11]:
def generate_shapes(var_data, var_name, colors=""):

    shape_coords = [var_data[var_name]["shape-coords"]]
    shapes = []

    for anno in var_data[var_name]["annotations"].keys():
        shape_coords.append(var_data[var_name]["annotations"][anno]["shape-coords"])
        for item in var_data[var_name]["annotations"][anno]["items"]:
            shape_coords.append(item["shape-coords"])

    if colors == "":
        colors = ['#888' for i in range(len(shape_coords))]
    
    for i, coords in enumerate(shape_coords):
        shapes.append({
            'type': 'rect',
            'xref': 'x',
            'yref': 'y',
            'x0': coords[0][0],
            'y0': coords[0][1],
            'x1': coords[1][0],
            'y1': coords[1][1],
            'line': {
                'color': '#888',
                'width': 2,
            },
            'fillcolor': 'rgba(55, 128, 191, 0.6)',
            'opacity': 0.5
        })
    return shapes

In [34]:
def make_graph(var_data, var_name, vert_space=4, box_width=10, \
                box_height=4, text_y0=100, plot_width=1000, plot_height=500, offset=20, expanded=True, biosamples=""):
    
    #get max number of items in one annotation (to compute plot height)
    max_items = max([len(var_data[anno]) for anno in var_data.keys()])
    num_annotations = len(var_data.keys())
    plot_height = 350*np.log(max_items)
    plot_width = 900 * np.log(num_annotations)
 
    #longest label
    box_width = max_text_len(var_data)
    offset = box_width + 5
    
    print("biosamples:", biosamples)

    
    positions = generate_positions(var_data, var_name, expanded=expanded, box_height=box_height, \
                                   box_width=box_width, offset=offset, text_y0=text_y0, biosamples=biosamples)
    
    print("max items:", max_items, num_annotations)
    #initialize node and edge traces:
    node_trace = go.Scatter(
        x = [],
        y = [],
        text = [],
        mode='markers+text',
        hoverinfo='closest',
        marker=dict(
            color=[],
            size=10,
            line=dict(width=10),
            opacity=0.0
        )
    )

    edge_trace = go.Scatter(
        x = [],
        y = [],
        line=dict(width=2,color='#888'),
        hoverinfo='none',
        mode='lines'        
    )
    
    #populate node information with text positions:
    node_trace['x'].append(positions[var_name]["text-coords"][0])
    node_trace['y'].append(positions[var_name]["text-coords"][1])
    node_trace['text'].append(var_name)
    for anno in positions[var_name]["annotations"].keys():
        node_trace['x'].append(positions[var_name]["annotations"][anno]["text-coords"][0])
        node_trace['y'].append(positions[var_name]["annotations"][anno]["text-coords"][1])
        node_trace['text'].append(anno)
        for item in positions[var_name]["annotations"][anno]["items"]:
            node_trace['x'].append(item["text-coords"][0])
            node_trace['y'].append(item["text-coords"][1])
            text = item["biosample_term_name"] + ": "
            if "state" in item.keys():
                text += item["state"]
            elif "footprint" in item.keys():
                text += item["footprint"]
            elif "target" in item.keys():
                text += item['target']
            node_trace["text"].append(text)

            
    #fill in edges between variant name and annotation names:
    for anno in positions[var_name]["annotations"].keys():
        edge_trace['x'] += [positions[var_name]["text-coords"][0], positions[var_name]["annotations"][anno]["text-coords"][0], None]
        edge_trace['y'] += [positions[var_name]["shape-coords"][0][1], positions[var_name]["annotations"][anno]["shape-coords"][1][1], None]
    
    #add invisible points:
    min_coords, max_coords = invisible_points(positions, var_name)
    node_trace['x'].append(min_coords[0])
    node_trace['y'].append(min_coords[1])
    node_trace['x'].append(max_coords[0])
    node_trace['y'].append(max_coords[1])
    
    layout = go.Layout(
        width=plot_width,
        height=plot_height,
        xaxis = dict(
            showgrid=False, 
            zeroline=False,
            showline=False,
            ticks='',
            showticklabels=False
        ),
        yaxis = dict(
            showgrid=False,
            zeroline=False,
            showline=False,
            ticks='',
            showticklabels=False
        ),
        showlegend=False,        
    )

    layout['shapes'] = generate_shapes(positions, var_name)

    data = [node_trace, edge_trace]
    fig = go.Figure(data=data, layout=layout)
    return fig
    

In [13]:
#finds the longest label name
def max_text_len(var_data):
    
    max_text_len = 0
    
    for anno, items in var_data.items():
        if len(anno) > max_text_len:
            max_text_len = len(anno)
        for item in items:
            text = item["biosample_term_name"] + ": "
            if "state" in item.keys():
                text += "state"
            elif "target" in item.keys():
                text += "target"
            elif "footprint" in item.keys():
                text += "footprint"
            if len(text) > max_text_len:
                max_text_len = len(text)
    
    return max_text_len

In [14]:
def invisible_points(positions, var_name):
    
    min_coords = [i for i in positions[var_name]["text-coords"]]
    max_coords = [i for i in positions[var_name]["text-coords"]]
    
    for anno, info in positions[var_name]["annotations"].items():
        if info["text-coords"][0] > max_coords[0]:
            max_coords[0] = info["text-coords"][0]
        elif info["text-coords"][0] < min_coords[0]:
            min_coords[0] = info["text-coords"][0]
        if info["text-coords"][1] > max_coords[1]:
            max_coords[1] = info["text-coords"][1]
        elif info["text-coords"][1] < min_coords[1]:
            min_coords[1] = info["text-coords"][1]
        
        for item in positions[var_name]["annotations"][anno]["items"]:
            if item["text-coords"][0] > max_coords[0]:
                max_coords[0] = item["text-coords"][0]
            elif item["text-coords"][0] < min_coords[0]:
                min_coords[0] = item["text-coords"][0]
            if item["text-coords"][1] > max_coords[1]:
                max_coords[1] = item["text-coords"][1]
            elif item["text-coords"][1] < min_coords[1]:
                min_coords[1] = item["text-coords"][1]
        
    #extend invisible x points to avoid text cutoffs:
    min_coords[0] -= 25
    max_coords[0] += 25
    
    return min_coords, max_coords

In [30]:
#get all biosample_term_names in var_data:
def get_biosamples(var_data):
    biosamples = []
    for anno in var_data.keys():
        for item in var_data[anno]:
            biosamples.append(item["biosample_term_name"])

    return list(set(biosamples))

In [15]:
#thing = json.loads(open("testing.json").read())
#plot(make_graph(var_data=thing, var_name="test", plot_height=1000, plot_width=600))
iplot(make_graph(var_data=var_data, var_name="test", plot_height=500, plot_width=1000, box_height=3, box_width=22, offset=27))

max items 4
num annotations 3
height 485.2030263919617
width 988.7510598012988
max text len: 26
max items: 4 3


In [17]:
thing = json.loads(open("../testing_files/testing.json").read())

In [6]:
rsid = "rs11257655"
#rsid = "rs7903146"
with urllib.request.urlopen(rsid_url(rsid)) as url:
    print(url.read())
    json_thing = json.loads(url.read())

NameError: name 'rsid_url' is not defined

In [235]:
iplot(make_graph(var_data=json_thing, var_name="test", plot_height=1000, plot_width=2000, box_height=4, box_width=20))

max items 30
num annotations 3
height 1190.4190835817544
width 988.7510598012988
max text len: 41
max items: 30 3


In [270]:
start_time = time.time()
with urllib.request.urlopen(rsid_url("rs7903146")) as url:
    print("time to open url:", time.time() - start_time)
    print(url)
    u = url.read()
    json_thing = json.loads(u)

time to open url: 1.8910350799560547
<http.client.HTTPResponse object at 0x1a130af940>


In [31]:
#new generate positions for specific biosample types:
def generate_positions(var_data, var_name, vert_space=4, box_width=10, \
                       box_height=4, text_y0=100, plot_width=100, offset=20, expanded=True, biosamples=""):
    
    #get number of annotations
    num_annotations = len(var_data.keys())
    num_sides = int(num_annotations/2)   
    
    positions = {
        var_name: {
            "text-coords": [plot_width/2, text_y0],
            "shape-coords": [
                [(plot_width/2)-box_width/2, text_y0-box_height/2],
                [(plot_width/2)+box_width/2, text_y0+box_height/2]
            ],
            "annotations": {key: {"items": val} for key, val in var_data.items()}
        }
    }
    
    #biosamples
    if biosamples == "":
        biosamples = get_biosamples(var_data)
    
    for i, anno in enumerate(positions[var_name]["annotations"].keys()):
        #get x offset:
        if num_annotations % 2 == 1 and i == int(num_annotations/2)+1:
            x_offset = positions[var_name]["text-coords"][0] + offset
        elif i+1 <= num_sides: 
            x_offset = positions[var_name]["text-coords"][0] - offset*(i+1)
        elif i+1 > num_sides and num_annotations % 2 == 1:
            x_offset = positions[var_name]["text-coords"][0] + offset*(i-int(num_annotations/2))
        else:
            x_offset = positions[var_name]["text-coords"][0] + offset*((i+1)-int(num_annotations/2))
            
        #set text coordinates for this annotation
        positions[var_name]["annotations"][anno]["text-coords"] = [
            x_offset, 
            text_y0-(2*vert_space)
        ]
        
        #set shape coordinates for this annotation
        positions[var_name]["annotations"][anno]["shape-coords"] = [
            [
                positions[var_name]["annotations"][anno]["text-coords"][0]-box_width/2,
                positions[var_name]["annotations"][anno]["text-coords"][1]-box_height/2
            ],
            [
                positions[var_name]["annotations"][anno]["text-coords"][0]+box_width/2,
                positions[var_name]["annotations"][anno]["text-coords"][1]+box_height/2
            ]
        ]
        
        if expanded == True:
            for j, item in enumerate(positions[var_name]["annotations"][anno]["items"]):
                if item['biosample_term_name'] in biosamples:
                    if j == 0:
                        item["text-coords"] = [
                            positions[var_name]["annotations"][anno]["text-coords"][0],
                            positions[var_name]["annotations"][anno]["text-coords"][1] - box_height
                        ]
                    else:
                        item["text-coords"] = [
                            positions[var_name]["annotations"][anno]["items"][j-1]["text-coords"][0],
                            positions[var_name]["annotations"][anno]["items"][j-1]["text-coords"][1] - box_height
                        ]
                    item["shape-coords"] = [
                        [item["text-coords"][0]-box_width/2, item["text-coords"][1]-box_height/2],
                        [item["text-coords"][0]+box_width/2, item["text-coords"][1]+box_height/2]
                    ]

    return positions

In [35]:
iplot(make_graph(var_data=thing, var_name="test",box_height=4, biosamples=["skeletal muscle tissue"]))

max items 22
num annotations 2
height 1081.8648586754107
width 623.8324625039508
max text len: 41
max items: 22 2
