###Canvas Creation from Enrichr

In [1]:
import pandas as pd 
import json
import requests
import math
import matplotlib
import uuid
from textwrap import dedent
from IPython.core.display import display, HTML
from string import Template
from matplotlib.ticker import MaxNLocator
from IPython.display import display,FileLink, Markdown
from matplotlib import colors
from random import seed
from random import randint

Below this is arbitrary definitions for these variables, their actual values will be selected in the Appyter (so DON'T ADD BELOW CELL TO APPYTER)

In [2]:
bar_color = 'mediumforestgreen'
bar_color_not_sig = 'lightgrey'
edgecolor = None
chart_type = "Canvas"
all_libraries = ['BioCarta_2016']
color = 'lightpink'

# list of genes that was input
genes = ['TP53', 'TNF', 'EGFR', 'GKN1', 'HADHA', 'APOE', 'ESR1', 'VEGFA', 'TGFB1', 'PREPL', 'TIA1', 'TPO', 'TTN', 'SATB2', 'CHPF', 'MALL', 'MIPIP', 'NUPL1', 'IL6', 'PDIA3', 'CTNNB1', 'SLC39A1', 'DTNA','SLC1A1', 'GALNT2', 'HIST2H2AC', 'CD63']

In [3]:
def library_processing(index):
    '''processes library data. returns anneal_list, x_dimension, y_dimension, num_hex'''
    raw_library_data = []
    library_data = []

    library_name = all_libraries[index]
    with open('Enrichr-Libraries/'+library_name, 'r') as f:
        for line in f.readlines():
            raw_library_data.append(line.split("\t\t"))
    name = []
    gene_list = []

    for i in range(len(raw_library_data)):
        name += [raw_library_data[i][0]]
        raw_genes = raw_library_data[i][1].split("\t")
        gene_list += [raw_genes[:-1]]

    library_data = [list(a) for a in zip(name, gene_list)]

    # library_data: a 2D list where the first element is the name and the second element is a list      of genes associated with that name

    jaccard_indices = []
    indices = []

    for gene_set in library_data:
        intersection = [value for value in gene_set[1] if value in genes]
        index = len(intersection)/(len(gene_set[1]) + len(genes))
        jaccard_indices += [[gene_set[0], index]]
        indices += [round(index, 5)]

    # determine the dimensions of the canvas
    x_dimension = math.ceil(math.sqrt(len(indices)))
    y_dimension = math.ceil(math.sqrt(len(indices)))

    # Zip name, gene_list, indices, and blank list for neighbor score then add dummy entries to the      zipped list
    num_hex = x_dimension*y_dimension
    neighbor_score = [0.0] * len(name)
    anneal_list = list(zip(name, gene_list, indices, neighbor_score))

    unzipped_anneal_list = [name, gene_list, indices, neighbor_score]

    # Add "dummy" hexagons
    for i in range(len(name), num_hex):
        anneal_list += [('', [], 0.0, 0.0)]

    return anneal_list, x_dimension, y_dimension, num_hex

In [4]:
# color options: red, blue, green. grey, orange, purple, yellow, pink
def get_color(anneal_list):
    r_value = 0
    g_value = 0
    b_value = 0

    if color == 'tomato':
        r_value = 10
        g_value = 3
        b_value = 3
    if color == 'lightskyblue':
        r_value = 0
        g_value = 3
        b_value = 10
    if color == 'plum':
        r_value = 5
        g_value = 0
        b_value = 10
    if color == 'mediumspringgreen':
        r_value = 0
        g_value = 10
        b_value = 2
    if color == 'lightgrey':
        r_value = 10
        g_value = 10
        b_value = 10
    if color == 'orange':
        r_value = 10
        g_value = 5.5
        b_value = 0
    if color == 'lightpink':
        r_value = 10
        g_value = 0
        b_value = 10
    if color == 'yellow':
        r_value = 10
        g_value = 9
        b_value = 0

    color_list = []

    unzipped_anneal_list = unzip_list(anneal_list)

    for i in range(x_dimension*y_dimension):
        if i < len(unzipped_anneal_list[2]):
            color_list += [matplotlib.colors.to_hex((unzipped_anneal_list[2][i]*r_value,
            unzipped_anneal_list[2][i]*g_value, unzipped_anneal_list[2][i]*b_value))]
        else:
            color_list += ["#000000"]
    return color_list

##Annealing  

Edge cases to consider:  
1. Corner (0, 15, 255, 240)  
2. Edge (1-14, 16, 31, 32, etc., 241-254)   



In [5]:
def find_neighbors(ind):
    '''returns a list of the indices of the neighbors of the index given'''
    neighbors = []
    if 0 <= ind <= x_dimension-1:
        # top row (inc. corners)
        if ind == 0:
            # top left corner
            neighbors = [num_hex-1, num_hex-x_dimension, x_dimension-1, 2*x_dimension-1, ind+1,                  ind+x_dimension]
        elif ind == (x_dimension-1):
            # top right corner
            neighbors = [ind-1, ind+x_dimension, 0, ind+x_dimension-1, num_hex-2, num_hex-1]
        else:
            # non-corner top row
            neighbors = [ind-1, ind+1, ind+x_dimension, ind+num_hex-x_dimension-1, 
            ind+num_hex-x_dimension, ind+x_dimension-1]

    elif (num_hex - x_dimension) <= ind <= num_hex -1:
        if ind == (num_hex-x_dimension):
            # bottom left corner
            neighbors = [ind+1, ind-x_dimension, ind-x_dimension+1, 0, 1, num_hex-1]
        elif ind == (num_hex-1):
            # bottom right corner
            neighbors = [ind-1, ind-x_dimension, ind-x_dimension+1, 0, x_dimension-1,
            num_hex-2*x_dimension]
        else:
            # non-corner bottom row
            neighbors = [ind-1, ind+1, ind-x_dimension, ind-x_dimension+1, ind-num_hex+x_dimension,
            ind-num_hex+x_dimension+1]
    elif ind % y_dimension == 0 and (ind/y_dimension)%2 == 1:
        # "inner" left edge (not top or bottom row)
        neighbors = [ind+x_dimension-1, ind+1, ind-x_dimension, ind-x_dimension+1, ind+x_dimension, 
        ind+x_dimension+1]
    elif ind % y_dimension == 0 and (ind/y_dimension)%2 == 0:
        # "outer" left edge (not top or bottom row)
        neighbors = [ind-1, ind+1, ind+x_dimension, ind+2*x_dimension-1, ind-x_dimension, 
        ind+x_dimension-1]
    elif (ind+1) % y_dimension == 0 and ((ind+1)/y_dimension)%2 == 0:
        # "outer" right edge (not top or bottom row)
        neighbors = [ind-1, ind+1, ind-x_dimension, ind-x_dimension+1, ind+x_dimension, 
        ind-2*x_dimension+1]
    elif (ind+1) % y_dimension == 0 and ((ind+1)/y_dimension)%2 == 1:
        # "inner" right edge (not top or bottom row)
        neighbors = [ind-1, ind-x_dimension-1, ind-x_dimension, ind-x_dimension+1, ind+x_dimension, 
        ind+x_dimension-1]
    else:
        # middle
        neighbors = [ind-1, ind+1, ind-x_dimension, ind-x_dimension+1, ind+x_dimension, 
        ind+x_dimension+1]
    return neighbors

In [6]:
# initially find fitness
def find_fitness(anneal_list):
    fitness = 0
    for i in range(len(anneal_list)):
        neighbors = find_neighbors(i)
        sum_neighbor_score = 0
        for index in neighbors:
            intersection = [value for value in anneal_list[index][1] if value in anneal_list[i][1]]
            if len(anneal_list[index][1]) + len(anneal_list[i][1]) != 0:
                jaccard = len(intersection)/(len(anneal_list[index][1]) + len(anneal_list[i][1]))
            else:
                jaccard = 0.0
            sum_neighbor_score += jaccard
        hex_list = list(anneal_list[i])
        hex_list[3] = sum_neighbor_score
        hex_tuple = tuple(hex_list)
        anneal_list[i] = hex_tuple
        fitness += sum_neighbor_score
    return fitness, anneal_list

# take indices of swapped hexagons
def find_swapped_fitness(anneal_list, swapped_a, swapped_b, old_fitness):
    neighbors_a = find_neighbors(swapped_a)
    neighbors_b = find_neighbors(swapped_b)
    hexagons_to_update = [swapped_a, swapped_b] + neighbors_a + neighbors_b

    new_fitness = 0
    # Recalculate scores for all hexagons that need updating
    for hex in hexagons_to_update:

        # subtract out the swapped neighbor fitnesses because they are changing 
        old_fitness -= anneal_list[hex][3]

        neighbors = find_neighbors(hex)
        sum_neighbor_score = 0
        for index in neighbors:
            intersection = [value for value in anneal_list[index][1] if value in anneal_list[hex][1]]
            if len(anneal_list[index][1]) + len(anneal_list[hex][1]) != 0:
                jaccard = len(intersection)/(len(anneal_list[index][1]) + len(anneal_list[hex][1]))
            else:
                jaccard = 0.0
            sum_neighbor_score += jaccard
        hex_list = list(anneal_list[hex])
        hex_list[3] = sum_neighbor_score
        hex_tuple = tuple(hex_list)
        anneal_list[hex] = hex_tuple
        new_fitness += sum_neighbor_score
    return old_fitness + new_fitness, anneal_list


In [7]:
def unzip_list(anneal_list):
    unzipped_list = zip(*anneal_list)
    return list(unzipped_list)

In [8]:
def annealing(anneal_list, steps, old_fitness):
    # returns unzipped list
    for i in range(steps):
        index_a = randint(0, num_hex-1)
        index_b = randint(0, num_hex-1)
        anneal_list[index_a], anneal_list[index_b] = anneal_list[index_b], anneal_list[index_a]
        new_fitness, anneal_list = find_swapped_fitness(anneal_list, index_a, index_b, old_fitness)
        if new_fitness <= old_fitness:
            # swap back
            anneal_list[index_a], anneal_list[index_b] = anneal_list[index_b], anneal_list[index_a]
        else:
            # finalize the swap by resetting old_fitness
            old_fitness = new_fitness
    return anneal_list

In [9]:
# NOTE: your d3 code should basically be a function, in the future it would be best to take it out.

def init_chart():
  chart_id = 'mychart-' + str(uuid.uuid4())
  display(HTML('<script src="/static/components/requirejs/require.js"></script>'))
  display(HTML(Template(dedent('''
  <script>
  require.config({
    paths: {
      'd3': 'https://cdnjs.cloudflare.com/ajax/libs/d3/5.16.0/d3.min',
      'd3-hexbin': 'https://d3js.org/d3-hexbin.v0.2.min',
    },
    shim: {
      'd3-hexbin': ['d3']
    }
  })

  // If we configure mychart via url, we can eliminate this define here
  define($chart_id, ['d3', 'd3-hexbin'], function(d3, d3_hexbin) {
    return function (figure_id, numA, numB, colorList, libraryList, indices) {
      var margin = {top: 50, right: 20, bottom: 20, left: 50},
        width = 850 - margin.left - margin.right,
        height = 350 - margin.top - margin.bottom;

      // append the svg object to the body of the page
      var svG = d3.select('#' + figure_id)
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.top + margin.bottom)
        .append("g")
          .attr("transform",
                "translate(" + margin.left + "," + margin.top + ")");
      
      //The number of columns and rows of the heatmap
      var MapColumns = numA,
          MapRows = numB;

      //The maximum radius the hexagons can have to still fit the screen
      var hexRadius = d3.min([width/((MapColumns + 0.5) * Math.sqrt(3)), height/((MapRows + 1/3) * 1.5)]);

      //Calculate the center position of each hexagon
      var points = [];
      for (var i = 0; i < MapRows; i++) {
          for (var j = 0; j < MapColumns; j++) {
              var x = hexRadius * j * Math.sqrt(3)
              //Offset each uneven row by half of a "hex-width" to the right
              if(i%2 === 1) x += (hexRadius * Math.sqrt(3))/2
              var y = hexRadius * i * 1.5
              points.push([x,y])
          }
      }

      //Set the hexagon radius
      var hexbin = d3_hexbin.hexbin().radius(hexRadius);

      svG.append("g")
        .selectAll(".hexagon")
        .data(hexbin(points))
        .enter().append("path")
        .attr("class", "hexagon")
        .attr("d", function (d) {
            return "M" + d.x + "," + d.y + hexbin.hexagon();
        })
        .attr("stroke", "white")
        .attr("stroke-width", "1px")
        .style("fill", function (d,i) { return colorList[i]; })
        .on("mouseover", mover)
        .on("mouseout", mout)
        .append("svg:title")
        .text(function(d,i) { return libraryList[i].concat(" ").concat(indices[i]); });

      function mover(d) {
      d3.select(this)
        .transition().duration(10)  
        .style("fill-opacity", 0.3)
      }

      //Mouseout function
      function mout(d) { 
      d3.select(this)
        .transition().duration(10)
        .style("fill-opacity", 1)
      }

    }
  })
  </script>
  ''')).substitute({ 'chart_id': repr(chart_id) })))
  return chart_id

def Canvas(numA, numB, colorList, libraryList, indices):
  chart_id = init_chart()
  display(HTML(Template(dedent('''
  <svg id=$figure_id></svg>
  <script>
  require([$chart_id], function(mychart) {
    mychart($figure_id, $numA, $numB, $colorList, $libraryList, $indices)
  })
  </script>
  ''')).substitute({
      'chart_id': repr(chart_id),
      'figure_id': repr('fig-' + str(uuid.uuid4())),
      'numA': repr(numA),
      'numB': repr(numB),
      'colorList': repr(color_list),
      'libraryList': repr(list(unzipped_anneal_list[0])),
      'indices': repr(list(unzipped_anneal_list[2]))
  })))

In [10]:
anneal_list, x_dimension, y_dimension, num_hex = library_processing(0)
anneal_list = annealing(anneal_list, 5000, find_fitness(anneal_list)[0])
color_list = get_color(anneal_list)
unzipped_anneal_list = unzip_list(anneal_list)
Canvas(x_dimension, y_dimension, color_list, list(unzipped_anneal_list[0]), list(unzipped_anneal_list[2]))