In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

### Hexagonal Canvas Creator from Enrichr Libraries

This appyter creates a hexagonal canvas comparing your inputted gene list to a library selected from Enrichr (https://amp.pharm.mssm.edu/Enrichr/). 

The resulting figure will have each hexagon representing one gene set in the library you chose and they will be colored based on their similarity to your inputted gene set (the darker, the more similar). Hovering over a hexagon will tell you the gene set it represents as well as the jaccard similarity index of that set with the set you inputted.

In [None]:
import pandas as pd 
import numpy as np
import json
import requests
import math
import matplotlib
import uuid
import urllib
from textwrap import dedent
from IPython.core.display import display, HTML
from string import Template
from random import seed, randint
from operator import itemgetter

In [None]:
%%appyter hide_code

{% do SectionField(name='section1', title = '1. Submit Your Gene List', subtitle = 'Upload a text file containing your gene list -OR- copy and paste your gene list into the text box below (One gene per row). You can also try it with the default gene list provided.', img = 'enrichr-icon.png')%}
{% do SectionField(name='section2', title = '2. Choose Enrichr Library', subtitle = 'Select up to two Enrichr libraries for which you would like to create canvases. If more than 2 libraries are selected, canvases will only be created for the first 2.', img = 'enrichr-icon.png')%}
{% do SectionField(name='section3', title = '3. Color Options', subtitle = 'Choose a color for your chart and choose how many hexagons are colored (default is 10).', img = 'enrichr-icon.png')%}
{% do SectionField(name='section4', title = '4. Color Scaling (Optional)', subtitle = 'Enter a value by which to normalize the color scaling. If you are not trying to compare to a previously created canvas, it is best to leave this blank. The value you enter must be greater than or equal to all Jaccard indices. Every Jaccard index will be divided by this value, then the coloring will be scaling based on the divided index. This is useful if you want to compare multiple canvases of different libraries and want the brightness to be constant throughout the figures. Be sure to scale by the highest maximum of the figures you are trying to compare. Scaling values will be provided in the output along with your canvas if you want to scale future canvases to this output.', img = 'enrichr-icon.png')%}

### Input options

In [None]:
%%appyter code_eval

gene_list_filename = {{ FileField(name='gene_list_filename', label='Gene List File', default='', description='Upload your gene list as a text file (One gene per row).', section = 'section1') }}

gene_list_input = {{ TextField(name='gene_list_input', label='Gene List', default='NSUN3\nPOLRMT\nNLRX1\nSFXN5\nZC3H12C\nSLC25A39\nARSG\nDEFB29\nNDUFB6\nZFAND1\nTMEM77\n5730403B10RIK\nRP23-195K8.6\nTLCD1\nPSMC6\nSLC30A6\nLOC100047292\nLRRC40\nORC5L\nMPP7\nUNC119B\nPRKACA\nTCN2\nPSMC3IP\nPCMTD2\nACAA1A\nLRRC1\n2810432D09RIK\nSEPHS2\nSAC3D1\nTMLHE\nLOC623451\nTSR2\nPLEKHA7\nGYS2\nARHGEF12\nHIBCH\nLYRM2\nZBTB44\nENTPD5\nRAB11FIP2\nLIPT1\nINTU\nANXA13\nKLF12\nSAT2\nGAL3ST2\nVAMP8\nFKBPL\nAQP11\nTRAP1\nPMPCB\nTM7SF3\nRBM39\nBRI3\nKDR\nZFP748\nNAP1L1\nDHRS1\nLRRC56\nWDR20A\nSTXBP2\nKLF1\nUFC1\nCCDC16\n9230114K14RIK\nRWDD3\n2610528K11RIK\nACO1\nCABLES1\nLOC100047214\nYARS2\nLYPLA1\nKALRN\nGYK\nZFP787\nZFP655\nRABEPK\nZFP650\n4732466D17RIK\nEXOSC4\nWDR42A\nGPHN\n2610528J11RIK\n1110003E01RIK\nMDH1\n1200014M14RIK\nAW209491\nMUT\n1700123L14RIK\n2610036D13RIK\nCOX15\nTMEM30A\nNSMCE4A\nTM2D2\nRHBDD3\nATXN2\nNFS1\n3110001I20RIK\nBC038156\nLOC100047782\n2410012H22RIK\nRILP\nA230062G08RIK\nPTTG1IP\nRAB1\nAFAP1L1\nLYRM5\n2310026E23RIK\nC330002I19RIK\nZFYVE20\nPOLI\nTOMM70A\nSLC7A6OS\nMAT2B\n4932438A13RIK\nLRRC8A\nSMO\nNUPL2\nTRPC2\nARSK\nD630023B12RIK\nMTFR1\n5730414N17RIK\nSCP2\nZRSR1\nNOL7\nC330018D20RIK\nIFT122\nLOC100046168\nD730039F16RIK\nSCYL1\n1700023B02RIK\n1700034H14RIK\nFBXO8\nPAIP1\nTMEM186\nATPAF1\nLOC100046254\nLOC100047604\nCOQ10A\nFN3K\nSIPA1L1\nSLC25A16\nSLC25A40\nRPS6KA5\nTRIM37\nLRRC61\nABHD3\nGBE1\nPARP16\nHSD3B2\nESM1\nDNAJC18\nDOLPP1\nLASS2\nWDR34\nRFESD\nCACNB4\n2310042D19RIK\nSRR\nBPNT1\n6530415H11RIK\nCLCC1\nTFB1M\n4632404H12RIK\nD4BWG0951E\nMED14\nADHFE1\nTHTPA\nCAT\nELL3\nAKR7A5\nMTMR14\nTIMM44\nSF1\nIPP\nIAH1\nTRIM23\nWDR89\nGSTZ1\nCRADD\n2510006D16RIK\nFBXL6\nLOC100044400\nZFP106\nCD55\n0610013E23RIK\nAFMID\nTMEM86A\nALDH6A1\nDALRD3\nSMYD4\nNME7\nFARS2\nTASP1\nCLDN10\nA930005H10RIK\nSLC9A6\nADK\nRBKS\n2210016F16RIK\nVWCE\n4732435N03RIK\nZFP11\nVLDLR\n9630013D21RIK\n4933407N01RIK\nFAHD1\nMIPOL1\n1810019D21RIK\n1810049H13RIK\nTFAM\nPAICS\n1110032A03RIK\nLOC100044139\nDNAJC19\nBC016495\nA930041I02RIK\nRQCD1\nUSP34\nZCCHC3\nH2AFJ\nPHF7\n4921508D12RIK\nKMO\nPRPF18\nMCAT\nTXNDC4\n4921530L18RIK\nVPS13B\nSCRN3\nTOR1A\nAI316807\nACBD4\nFAH\nAPOOL\nCOL4A4\nLRRC19\nGNMT\nNR3C1\nSIP1\nASCC1\nFECH\nABHD14A\nARHGAP18\n2700046G09RIK\nYME1L1\nGK5\nGLO1\nSBK1\nCISD1\n2210011C24RIK\nNXT2\nNOTUM\nANKRD42\nUBE2E1\nNDUFV1\nSLC33A1\nCEP68\nRPS6KB1\nHYI\nALDH1A3\nMYNN\n3110048L19RIK\nRDH14\nPROZ\nGORASP1\nLOC674449\nZFP775\n5430437P03RIK\nNPY\nADH5\nSYBL1\n4930432O21RIK\nNAT9\nLOC100048387\nMETTL8\nENY2\n2410018G20RIK\nPGM2\nFGFR4\nMOBKL2B\nATAD3A\n4932432K03RIK\nDHTKD1\nUBOX5\nA530050D06RIK\nZDHHC5\nMGAT1\nNUDT6\nTPMT\nWBSCR18\nLOC100041586\nCDK5RAP1\n4833426J09RIK\nMYO6\nCPT1A\nGADD45GIP1\nTMBIM4\n2010309E21RIK\nASB9\n2610019F03RIK\n7530414M10RIK\nATP6V1B2\n2310068J16RIK\nDDT\nKLHDC4\nHPN\nLIFR\nOVOL1\nNUDT12\nCDAN1\nFBXO9\nFBXL3\nHOXA7\nALDH8A1\n3110057O12RIK\nABHD11\nPSMB1\nENSMUSG00000074286\nCHPT1\nOXSM\n2310009A05RIK\n1700001L05RIK\nZFP148\n39509\nMRPL9\nTMEM80\n9030420J04RIK\nNAGLU\nPLSCR2\nAGBL3\nPEX1\nCNO\nNEO1\nASF1A\nTNFSF5IP1\nPKIG\nAI931714\nD130020L05RIK\nCNTD1\nCLEC2H\nZKSCAN1\n1810044D09RIK\nMETTL7A\nSIAE\nFBXO3\nFZD5\nTMEM166\nTMED4\nGPR155\nRNF167\nSPTLC1\nRIOK2\nTGDS\nPMS1\nPITPNC1\nPCSK7\n4933403G14RIK\nEI24\nCREBL2\nTLN1\nMRPL35\n2700038C09RIK\nUBIE\nOSGEPL1\n2410166I05RIK\nWDR24\nAP4S1\nLRRC44\nB3BP\nITFG1\nDMXL1\nC1D\n', description='Paste your gene list (One gene per row).', section = 'section1') }}

all_libraries = {{ MultiCheckboxField(name='enrichr_libraries', description='Select up to 2 Enrichr libraries you would like in your figure.', label='Enrichr libraries', default=['WikiPathways_2019_Human'], section = 'section2',choices=[
    'Transcription_Factor_PPIs',
    'TRRUST_Transcription_Factors_2019',
    'BioCarta_2016',
    'HMS_LINCS_KinomeScan',
    'HumanCyc_2016',
    'huMAP',
    'KEA_2015',
    'KEGG_2019_Human',
    'KEGG_2019_Mouse',
    'NCI-Nature_2016',
    'Panther_2016',
    'Phosphatase_Substrates_from_DEPOD',
    'SILAC_Phosphoproteomics',
    'SubCell_BarCode',
    'WikiPathways_2019_Human',
    'GO_Cellular_Component_2018',
    'GO_Molecular_Function_2018',
    'GO_Biological_Process_2018',
    'Achilles_fitness_decrease',
    'Achilles_fitness_increase',
    'ClinVar_2019',
    'dbGaP',
    'LINCS_L1000_Ligand_Perturbations_down',
    'LINCS_L1000_Ligand_Perturbations_up',
    'MSigDB_Computational',
    'MSigDB_Oncogenic_Signatures',
    'OMIM_Disease',
    'OMIM_Expanded',
    'PheWeb_2019',
    'UK_Biobank_GWAS_v1',
    'VirusMINT',
    'Tissue_Protein_Expression_from_Human_Proteome_Map',
    'Data_Acquisition_Method_Most_Popular_Genes',
    'Enrichr_Libraries_Most_Popular_Genes',
    'Pfam_InterPro_Domains',
    'Pfam_Domains_2019',
    'BioCarta_2015',
    'HumanCyc_2015']) }}    
    
color = '{{ ChoiceField(name='color', description='Select a color for your barchart.', label='Color', default='Purple', section = 'section3',choices=[              
    'Blue',
    'Green',
    'Orange',
    'Purple',
    'Red',
    'Yellow',
    'Pink']) }}'

num_hex_colored = '{{ ChoiceField(name='Number of Colored Hexagons', description='Select the number of hexagons you want colored.', label='Number of Colored Hexagons', default='10', section='section3', choices=[
    '10',
    '20']) }}'

scaling_factor = {{ StringField(name='scaling_factor', label='Scaling Factor', default='', description='Provide your color scaling factor here (optional).', section = 'section4') }}

### Import gene list

In [None]:
# Import gene list as file or from text box file
# Will choose file upload over textbox if a file is given 

if gene_list_filename != '':
    open_gene_list_file = open(gene_list_filename,'r')
    lines = open_gene_list_file.readlines()
    genes = [x.strip().upper() for x in lines]
    open_gene_list_file.close()
else:
    genes = gene_list_input.split('\n')
    genes = [x.strip().upper() for x in genes]

In [None]:
%%appyter code_eval

def library_processing(library_index):
    # downloads library data
    # library data is pre-annealed so the canvas will have the most similar gene sets closest together

    raw_library_data = []
    library_data = []

    library_name = all_libraries[library_index]
    with urllib.request.urlopen('https://raw.githubusercontent.com/MaayanLab/Enrichr-Viz-Appyter/master/Enrichr-Processed-Library-Storage/Annealing/Annealed-Libraries/' + all_libraries[library_index] + '.txt') as f:
        for line in f.readlines():
            raw_library_data.append(line.decode('utf-8').split("\t\t"))
    name = []
    gene_list = []

    for i in range(len(raw_library_data)):
        name += [raw_library_data[i][0]]
        raw_genes = raw_library_data[i][1].split('\t')
        gene_list += [raw_genes[:-1]]

    library_data = [list(a) for a in zip(name, gene_list)]

    # raw_library_data: a 2D list where the first element is the name and the second element is a list of genes associated with that name

    jaccard_indices = []
    indices = []

    for gene_set in library_data:
        intersection = [value for value in gene_set[1] if value in genes]
        index = len(intersection)/(len(gene_set[1]) + len(genes))
        jaccard_indices += [[gene_set[0], index]]
        indices += [round(index, 5)]

    # determine the dimensions of the canvas
    x_dimension = math.ceil(math.sqrt(len(indices)))
    y_dimension = math.ceil(math.sqrt(len(indices)))

    # zip name, gene_list, indices, and blank list for neighbor score then add dummy entries to the zipped list
    num_hex = x_dimension*y_dimension
    anneal_list = list(zip(name, gene_list, indices))

    return anneal_list, x_dimension, y_dimension

In [None]:
def unzip_list(anneal_list):
    unzipped_list = zip(*anneal_list)
    return list(unzipped_list)

### Process color choice

In [None]:
def get_color(anneal_list, cut_off_value, scaling_factor, x_dimension, y_dimension):

    # Deal with cut_off_value (only color the most significant 10/20 hexagons)
    if cut_off_value == 2.0:
        sort_list = sorted(anneal_list, key=itemgetter(2), reverse=True)
        cut_off_value = sort_list[int(num_hex_colored)-1][2]

    r_value = 0
    g_value = 0
    b_value = 0

    if color == 'Red':
        r_value = 0.0
        g_value = 0.8
        b_value = 0.8
    if color == 'Yellow':
        r_value = 0.0
        g_value = 0.3
        b_value = 1.0
    if color == 'Purple':
        r_value = 0.5
        g_value = 1.0
        b_value = 0.0
    if color == 'Pink':
        r_value = 0.0
        g_value = 1.0
        b_value = 0.2
    if color == 'Orange':
        r_value = 0.0
        g_value = 0.45
        b_value = 1.0
    if color == 'Green':
        r_value = 1.0
        g_value = 0.0
        b_value = 1.0
    if color == 'Blue':
        r_value = 1.0
        g_value = 0.9
        b_value = 0.0

    color_list = []

    unzipped_anneal_list = unzip_list(anneal_list)
    
    if scaling_factor == '':
        max_index = max(unzipped_anneal_list[2])
    else:
        max_index = float(scaling_factor)

    if max_index != 0:
        scaled_list = [i/max_index for i in unzipped_anneal_list[2]]
    else:
        scaled_list = unzipped_anneal_list[2]

    for i in range(x_dimension*y_dimension):
        if i < len(unzipped_anneal_list[2]) and float(unzipped_anneal_list[2][i]) >= cut_off_value:
            color_list += [matplotlib.colors.to_hex((1-scaled_list[i]*r_value, 
            1-scaled_list[i]*g_value, 1-scaled_list[i]*b_value))]
        elif i < len(unzipped_anneal_list[2]):
            color_list += [matplotlib.colors.to_hex((1-scaled_list[i], 
            1-scaled_list[i], 1-scaled_list[i]))]
        else:
            color_list += ["#FFFFFF"]
    return color_list, max_index, cut_off_value

### Functions to create Canvas (uses Javascript's D3)

In [None]:
def init_chart():
  chart_id = 'mychart-' + str(uuid.uuid4())
  display(HTML('<script src="/static/components/requirejs/require.js"></script>'))
  display(HTML(Template(dedent('''
  <script>
  require.config({
    paths: {
      'd3': 'https://cdnjs.cloudflare.com/ajax/libs/d3/5.16.0/d3.min',
      'd3-hexbin': 'https://d3js.org/d3-hexbin.v0.2.min',
    },
    shim: {
      'd3-hexbin': ['d3']
    }
  })

  // If we configure mychart via url, we can eliminate this define here
  define($chart_id, ['d3', 'd3-hexbin'], function(d3, d3_hexbin) {
    return function (figure_id, numA, numB, colorList, libraryList, indices) {
      var margin = {top: 50, right: 20, bottom: 20, left: 50},
        width = 1050 - margin.left - margin.right,
        height = 550 - margin.top - margin.bottom;

      // append the svg object to the body of the page
      var svG = d3.select('#' + figure_id)
                  .attr("width", width + margin.left + margin.right)
                  .attr("height", height + margin.top + margin.bottom)
                  .append("g")
                  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
      
      //The number of columns and rows of the heatmap
      var MapColumns = numA,
          MapRows = numB;

      //The maximum radius the hexagons can have to still fit the screen
      var hexRadius = d3.min([width/((MapColumns + 0.5) * Math.sqrt(3)), height/((MapRows + 1/3) * 1.5)]);

      //Calculate the center position of each hexagon
      var points = [];
      for (var i = 0; i < MapRows; i++) {
          for (var j = 0; j < MapColumns; j++) {
              var x = hexRadius * j * Math.sqrt(3)
              //Offset each uneven row by half of a "hex-width" to the right
              if(i%2 === 1) x += (hexRadius * Math.sqrt(3))/2
              var y = hexRadius * i * 1.5
              points.push([x,y])
          }
      }

      //Set the hexagon radius
      var hexbin = d3_hexbin.hexbin().radius(hexRadius);

      svG.append("g")
        .selectAll(".hexagon")
        .data(hexbin(points))
        .enter().append("path")
        .attr("class", "hexagon")
        .attr("d", function (d) {
            return "M" + d.x + "," + d.y + hexbin.hexagon();
        })
        .attr("stroke", "black")
        .attr("stroke-width", "1px")
        .style("fill", function (d,i) { return colorList[i]; })
        .on("mouseover", mover)
        .on("mouseout", mout)
        .append("svg:title")
        .text(function(d,i) { return libraryList[i].concat(" ").concat(indices[i]); });

      // Mouseover function
      function mover(d) {
      d3.select(this)
        .transition().duration(10)  
        .style("fill-opacity", 0.3)
      };

      // Mouseout function
      function mout(d) { 
      d3.select(this)
        .transition().duration(10)
        .style("fill-opacity", 1)
      };

  }

  })
  </script>
  ''')).substitute({ 'chart_id': repr(chart_id) })))
  return chart_id

def Canvas(numA, numB, colorList, libraryList, indices):
  chart_id = init_chart()
  display(HTML(Template(dedent('''
  <svg id=$figure_id></svg>
  <script>
  require([$chart_id], function(mychart) {
    mychart($figure_id, $numA, $numB, $colorList, $libraryList, $indices)
  })
  </script>
  ''')).substitute({
      'chart_id': repr(chart_id),
      'figure_id': repr('fig-' + str(uuid.uuid4())),
      'numA': repr(numA),
      'numB': repr(numB),
      'colorList': repr(colorList),
      'libraryList': repr(libraryList),
      'indices': repr(indices)
  })))

The figure(s) output below are made up of hexagons, each one representing one gene set in your selected library (or libraries). They are colored based on their similarity to your inputted gene set (the darker, the more similar). Hovering over a hexagon will tell you the gene set it represents as well as the jaccard similarity index of that set compared to the input gene set.

In [None]:
if len(all_libraries) == 1:
    anneal_list, x_dimension, y_dimension = library_processing(0)
    color_list, scaling_factor, cut_off_value = get_color(anneal_list, 2.0, scaling_factor, x_dimension, y_dimension)
    unzipped_anneal_list = unzip_list(anneal_list)
    print(all_libraries[0])
    Canvas(x_dimension, y_dimension, color_list, list(unzipped_anneal_list[0]), list(unzipped_anneal_list[2]))
    print("The color scaling factor for this canvas is: " + str(scaling_factor))
    print("Gene sets with a similarity index less than " + str(cut_off_value) + " are not colored")

elif len(all_libraries) >= 2:
    # first library
    anneal_list_1, x_dimension_1, y_dimension_1 = library_processing(0)
    color_list_1, scaling_factor_1, cut_off_value_1 = get_color(anneal_list_1, 2.0, scaling_factor, x_dimension_1, y_dimension_1)

    # second library
    anneal_list_2, x_dimension_2, y_dimension_2 = library_processing(1)
    color_list_2, scaling_factor_2, cut_off_value_2 = get_color(anneal_list_2, 2.0, scaling_factor, x_dimension_2, y_dimension_2)

    if scaling_factor_1 > scaling_factor_2:
        scaling_factor = scaling_factor_1
        color_list_2, scaling_factor_2, cut_off_value_2 = get_color(anneal_list_2, 2.0, scaling_factor, x_dimension_2, y_dimension_2)
    else:
        scaling_factor = scaling_factor_2
        color_list_1, scaling_factor_1, cut_off_value_1 = get_color(anneal_list_1, 2.0, scaling_factor, x_dimension_1, y_dimension_1)

    unzipped_anneal_list_1 = unzip_list(anneal_list_1)
    unzipped_anneal_list_2 = unzip_list(anneal_list_2)

    print(all_libraries[0])
    Canvas(x_dimension_1, y_dimension_1, color_list_1, list(unzipped_anneal_list_1[0]), list(unzipped_anneal_list_1[2]))
    print(all_libraries[1])
    Canvas(x_dimension_2, y_dimension_2, color_list_2, list(unzipped_anneal_list_2[0]), list(unzipped_anneal_list_2[2]))

    print("The color scaling factor for these canvases are: " + str(scaling_factor))
    print("For the first canvas, gene sets with a similarity index less than " + str(cut_off_value_1) + " are not colored")
    print("For the second canvas, gene sets with a similarity index less than " + str(cut_off_value_2) + " are not colored")