diff --git a/.ipynb_checkpoints/ExtendedRF-checkpoint.ipynb b/.ipynb_checkpoints/ExtendedRF-checkpoint.ipynb new file mode 100644 index 00000000..99b549d7 --- /dev/null +++ b/.ipynb_checkpoints/ExtendedRF-checkpoint.ipynb @@ -0,0 +1,1753 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Test trees" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "import dendropy\n", + "from dendropy.calculate import treecompare\n", + "import logging\n", + "from collections import defaultdict\n", + "import itertools\n", + "import random\n", + "import math\n", + "import copy\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "taxa = dendropy.TaxonNamespace()\n", + "tree_str_1 = \"[&R] ((R,((E,F)speciation,(G,H)duplication)speciation)speciation,(Q,((K,L)speciation,(J,I)speciation)speciation)duplication)duplication;\"\n", + "##tree_1 = dendropy.Tree.get(\n", + "## data=tree_str_1,\n", + "## schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_str_2 = \"[&R] ((Q,((E,F)duplication,(G,H)speciation)speciation)speciation,(R,((K,L)speciation,(J,I)speciation)speciation)duplication)duplication;\"\n", + "##tree_2 = dendropy.Tree.get(\n", + "## data=tree_str_2,\n", + "## schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "#tree_str_1 = \"[&U] ((A,B)speciation, (C,D)speciation)speciation;\"\n", + "#tree_str_2 = \"[&U] ((A,C)speciation, (B,D)speciation)speciation;\"\n", + "tree_str_3 = \"[&R] ((A,B)speciation, (C,D)speciation);\"\n", + "tree_str_4 = \"[&R] ((A,C)speciation, (B,D)speciation);\"\n", + "\n", + "tree_1 = dendropy.Tree.get(\n", + " data=tree_str_1,\n", + " schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_2 = dendropy.Tree.get(\n", + " data=tree_str_2,\n", + " schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_3 = dendropy.Tree.get(\n", + " data=tree_str_3,\n", + " schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_4 = dendropy.Tree.get(\n", + " data=tree_str_4,\n", + " schema=\"newick\", taxon_namespace = taxa)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# Run the test on a much larger dataset\n", + "taxa2 = dendropy.TaxonNamespace()\n", + "t5 = dendropy.Tree.get_from_path('p53.nhx', 'newick', taxon_namespace=taxa2)\n", + "for n in t5.internal_nodes():\n", + " if random.random() < 0.7:\n", + " n.label = 'speciation'\n", + " else:\n", + " n.label = 'duplication'\n", + "t5.seed_node.label = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Function definitions" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# make a copy and reroot\n", + "def cloneAndReroot(t):\n", + " t1 = t.clone(depth=1)\n", + " # reroot around the edge leading to the first leaf\n", + " l = next(t1.leaf_node_iter())\n", + " t1.reroot_at_edge(l.incident_edges()[0])\n", + " return(t1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def identify_labelled_bipartitions(t):\n", + " \n", + " t.encode_bipartitions()\n", + "\n", + " def recursive_f(n,bipart,bipart2nodes,numTaxa):\n", + " if n.is_leaf():\n", + " return\n", + " for c in n.child_node_iter():\n", + " if not c.is_leaf():\n", + " numTaxaC = bin(c.bipartition.split_bitmask).count('1')\n", + " numTaxaN = numTaxa - numTaxaC\n", + "\n", + " # at the start of the recursion, one subtree might contain all taxa \n", + " # minus the other one so we must ignore this trivial bipartition\n", + " if numTaxaN > 1:\n", + " bipart.append(c.bipartition)\n", + " bipart2nodes[c.bipartition] = [c,n]\n", + " recursive_f(c,bipart,bipart2nodes,numTaxa)\n", + " \n", + " numTaxa = len(t.leaf_nodes())\n", + " bipart = []\n", + " bipart2nodes = {}\n", + " recursive_f(t.seed_node,bipart,bipart2nodes,numTaxa)\n", + " return(bipart,bipart2nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def LRF(intree1,intree2):\n", + " t1 = intree1.clone()\n", + " t2 = intree2.clone()\n", + " b1,bTn1 = identify_labelled_bipartitions(t1)\n", + " b2,bTn2 = identify_labelled_bipartitions(t2)\n", + "\n", + " # FIRST PART: identifying the bipartitions unique to one or the other tree\n", + " # and collapsing the trivial edges\n", + " tocollapse1 = []\n", + " tocollapse2 = []\n", + " simple_rf = tot_rf = path_flip = 0\n", + " for k in set(b1).difference(set(b2)):\n", + " logging.info(k)\n", + " # retrieve the nodes associated with the bipartition\n", + " [c,n] = bTn1[k]\n", + " if c.label == n.label:\n", + " logging.info('collapsing node %s' % c)\n", + " c.edge.collapse()\n", + " simple_rf += 1\n", + " else:\n", + " tocollapse1.append(k)\n", + " logging.info('---')\n", + " for k in set(b2).difference(set(b1)):\n", + " logging.info(k)\n", + " # retrieve the nodes associated with the bipartition\n", + " [c,n] = bTn2[k]\n", + " if c.label == n.label:\n", + " logging.info(\"collapsing node %s\" % c)\n", + " c.edge.collapse()\n", + " simple_rf += 1\n", + " else:\n", + " tocollapse2.append(k)\n", + " logging.info('***') \n", + " print('step 1, collapse consistent edges: %d' % simple_rf)\n", + "\n", + " # PART II: identifying the connected components of edges to be \n", + " # collapsed and recording the ends of the longest path\n", + " #\n", + " # The idea of this function is to go from root to leaves,\n", + " # and then, on the way back, compute the longest path of\n", + " # each component to be collapsed.\n", + " # Within each component, we need to keep track of three potential \n", + " # cases: \n", + " # (1) the longest path spans over the edge (n.edge) in question,\n", + " # in which case its total length is still undetermined \n", + " # (2) the longest path spans over two of the children of n. In\n", + " # this case the length is known but the type needs to be\n", + " # determined\n", + " # (3) the longest path is found somewhere deeper. We need to keep\n", + " # track of its type.\n", + " #\n", + " # Note that the longest path is highlighted with ****\n", + " #\n", + " # ------\n", + " # /\n", + " # **************************\n", + " # *\n", + " # ****n.edge**** n (case 1)\n", + " # \\\n", + " # -----------------\n", + " #\n", + " # ------\n", + " # /\n", + " # **************************\n", + " # *\n", + " # ---n.edge---- n (case 2)\n", + " # *\n", + " # **********************\n", + " #\n", + " #\n", + " # ************\n", + " # *\n", + " # ------------**************\n", + " # /\n", + " # ---n.edge---- n (case 3)\n", + " # \\\n", + " # ---------------\n", + "\n", + " # a helpful datastructure to store a path\n", + " class maxpath(object):\n", + " def __init__(self, length=0, n1=None, n2=None):\n", + " self.length = length\n", + " self.n1 = n1\n", + " self.n2 = n2\n", + " def __cmp__(self, other):\n", + " return(cmp(self.length, other.length))\n", + " def __lt__(self, other):\n", + " return(self.length < other.length)\n", + " def __eq__(self, other):\n", + " return(self.length == other.length)\n", + " def __str__(self):\n", + " return('Path length: %d. Ends %s & %s' % (self.length, self.n1, self.n2))\n", + "\n", + " def recursive_f(n,tocollapse,out):\n", + " nonlocal path_flip\n", + " if n.is_leaf():\n", + " return([maxpath(),maxpath()])\n", + " case1 = []\n", + " case3 = []\n", + " for c in n.child_node_iter():\n", + " [t1,t3] = recursive_f(c,tocollapse,out)\n", + " case1.append(t1)\n", + " case3.append(t3)\n", + " case1.sort(reverse=True)\n", + " case3.sort(reverse=True)\n", + " # do we need to collapse the edge in question?\n", + " if n.bipartition in tocollapse:\n", + " # if the maximum path goes over the edge n, we need\n", + " # to increment the length in question\n", + " newcase1 = copy.deepcopy(case1[0])\n", + " if case1[0].length == 0:\n", + " newcase1.n1 = copy.deepcopy(n)\n", + " newcase1.length = newcase1.length + 1\n", + " # if not, we keep track of the longest path in the component\n", + " # seen so far (case 3)\n", + " newcase3 = copy.deepcopy(case3[0])\n", + " # if there are more than one child edge to collapse,\n", + " # there is one last scenario, which is that the longest path\n", + " # is obtained by connecting two children edges (case 2)\n", + " if case1[0].length + case1[1].length > case3[0].length:\n", + " # store the total length\n", + " newcase3.length = case1[0].length+case1[1].length\n", + " # store the end nodes\n", + " newcase3.n1 = case1[0].n1\n", + " newcase3.n2 = case1[1].n1\n", + " return([newcase1,newcase3])\n", + " else:\n", + " # if one of the children was still part of a component,\n", + " # we need to store it\n", + " if sum([c.length for c in case1]) > 0:\n", + " # we need to check what is the maximum length path\n", + " if case1[0].length > case3[0].length:\n", + " # longest path is case 1 or case 2\n", + " if case1[1].length > 0:\n", + " # longest path is case 2\n", + " bestpath = maxpath(case1[0].length + case1[1].length, case1[0].n1, case1[1].n1)\n", + " if bestpath.length % 2 == 0:\n", + " assert bestpath.n1.label == bestpath.n2.label\n", + " else:\n", + " assert bestpath.n1.label != bestpath.n2.label \n", + " else:\n", + " bestpath = case1[0]\n", + " bestpath.n2 = copy.deepcopy(n)\n", + " # longest path is case 1\n", + " if bestpath.length % 2 == 0:\n", + " assert n.label == bestpath.n1.label\n", + " else:\n", + " assert n.label != bestpath.n1.label\n", + "\n", + " else:\n", + " # longest path is case3\n", + " bestpath = case3[0]\n", + "\n", + " path_flip += math.floor((bestpath.length+1)/2)\n", + " out[n] = bestpath\n", + " # determine type\n", + " if bestpath.length % 2 == 0:\n", + " n.label = bestpath.n1.label\n", + " else:\n", + " n.label = \"arbitrary\"\n", + " # reset counts\n", + " return([maxpath(),maxpath()])\n", + " \n", + " path_flip = 0\n", + " tot_rf = simple_rf + len(tocollapse1) + len(tocollapse2)\n", + " out1 = {}\n", + " recursive_f(t1.seed_node,tocollapse1,out1)\n", + " out2 = {}\n", + " recursive_f(t2.seed_node,tocollapse2,out2)\n", + " for n in t1.internal_nodes():\n", + " if n.bipartition in tocollapse1:\n", + " n.edge.collapse()\n", + " for n in t2.internal_nodes():\n", + " if n.bipartition in tocollapse2:\n", + " n.edge.collapse()\n", + " \n", + " \n", + " logging.info(' #component t1 %d', len(out1))\n", + " logging.info(out1)\n", + " logging.info(' #component t2 %d', len(out2))\n", + " logging.info(out2)\n", + " logging.info(' #flips %d', path_flip)\n", + " logging.info(t1)\n", + " logging.info(t2)\n", + " logging.info(t1.as_ascii_plot())\n", + "\n", + " ## PART 3: flipping the nodes, if needed, and checking whether\n", + " ## arbitary vs arbitrary comparisons can save a flip by going\n", + " ## over an expansion instead of a contraction.\n", + " \n", + " def ordered_bipartitions(tree):\n", + " leaves_dic = {}\n", + " for node in tree.internal_nodes():\n", + " leaves_dic[tuple(sorted(nd.taxon.label for nd in dendropy.Tree(seed_node = node).leaf_nodes()))] = node\n", + " return leaves_dic\n", + "\n", + " ob1 = ordered_bipartitions(t1)\n", + " ob2 = ordered_bipartitions(t2)\n", + "\n", + " \n", + " def branches_at_end(path,tocollapse):\n", + " tc1 = set([t.split_bitmask for t in tocollapse])\n", + "\n", + " s1 = [t.bipartition.split_bitmask for t in path.n1.child_nodes()]\n", + " s1.append(path.n1.bipartition.split_bitmask)\n", + " s1 = set(s1)\n", + " s1 = s1.difference(tc1)\n", + "\n", + " s2 = [t.bipartition.split_bitmask for t in path.n2.child_nodes()]\n", + " s2.append(path.n2.bipartition.split_bitmask)\n", + " s2 = set(s2)\n", + " s2 = s2.difference(tc1)\n", + "\n", + " # convention is return speciation, then duplication node\n", + " if path.n1.label == 'speciation':\n", + " return([s1,s2])\n", + " else:\n", + " return([s2,s1])\n", + "\n", + " end_flip = 0\n", + " for b in ob1:\n", + " if (ob1[b].label == \"speciation\" and ob2[b].label == \"duplication\") or (\n", + " ob1[b].label == \"duplication\" and ob2[b].label == \"speciation\"):\n", + " end_flip += 1\n", + " elif (ob1[b].label == \"arbitrary\" and ob2[b].label == \"arbitrary\"):\n", + " # let's check if we can save a flip\n", + " \n", + " [s1,s2] = branches_at_end(out1[ob1[b]],tocollapse1)\n", + " [s3,s4] = branches_at_end(out2[ob2[b]],tocollapse2)\n", + " logging.info(s1,s2,s3,s4)\n", + " \n", + " # To be able to save a flip via expansion, one end has to\n", + " # contain all of the branches of its corresponding end in\n", + " # the other tree, e.g. like so: \n", + " #\n", + " # s1 s2 s3 s4\n", + " # {b1,b2,b3}S-----D{b4,b5} vs. {b1,b2}S-----D{b3,b4,b5} \n", + " # \n", + " # (s3 contained in s1, and s2 contained in s4). \n", + " # Note that the end type have to match.\n", + " if (bool(s1.difference(s3)) or bool(s3.difference(s1)) or \n", + " bool(s2.difference(s4)) or bool(s4.difference(s2))):\n", + " end_flip -= 1\n", + " logging.info('Matching islands are both of type arbitrary & can save a flip!')\n", + " print('step 3, perform remaining flips ', end_flip)\n", + " erf = tot_rf+path_flip+end_flip\n", + " print('**TOTAL** extended RF dist:', erf)\n", + " return(erf)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "## The following function definitions are used to introduce random edits\n", + "\n", + "def collapse_node(node):\n", + " add_flip = 0\n", + " if node.parent_node != None and node.label == node.parent_node.label:\n", + " node.edge.collapse()\n", + " else:\n", + " raise ValueError('this edge cannot be collapsed without a flip!')\n", + " return add_flip\n", + "def flip_node(node):\n", + " if node.label == 'speciation':\n", + " node.label = 'duplication'\n", + " elif node.label == 'duplication':\n", + " node.label = 'speciation'\n", + " else:\n", + " raise ValueError('Should not happen!!')\n", + " number = random.randint(1,2)\n", + " if number == 1:\n", + " node.label = \"speciation\"\n", + " if number == 2:\n", + " node.label = \"duplication\"\n", + " \n", + "## Note by CD: I have simplified and cleaned up the code. See my version below\n", + "def extend_node2(node):\n", + " if len(node.adjacent_nodes())<= 3:\n", + " raise ValueError(\"Insufficient degree\")\n", + " \n", + " tree_test = tree_1.clone()\n", + " tmp = node\n", + " modif = node.child_nodes()\n", + " random_number = random.randint(1, len(modif) - 2)\n", + " partition = modif[random_number:len(modif)]\n", + " subtrees_modif = []\n", + " for nd in partition:\n", + " subtrees_modif.append(dendropy.Tree(seed_node = nd).leaf_nodes())\n", + " subtrees_modif = [item for sublist in subtrees_modif for item in sublist]\n", + " subtrees_modif = [node.taxon.label for node in subtrees_modif]\n", + " tree_test.retain_taxa_with_labels(subtrees_modif)\n", + " tmp.add_child(tree_test.seed_node)\n", + " return tree_1.as_ascii_plot()\n", + "\n", + "def extend_node(node):\n", + " if len(node.adjacent_nodes())<= 3:\n", + " print(node.adjacent_nodes(),node.incident_edges())\n", + " raise ValueError(\"Insufficient degree. Should not happen\")\n", + " # store the children in a temporary array\n", + " children = node.child_nodes()\n", + " # shuffle the array\n", + " random.shuffle(children)\n", + " \n", + " \n", + " node.clear_child_nodes()\n", + " new = node.add_child(dendropy.Node(label=node.label))\n", + " # choose between 2 and n-1 children to be connected to the new node\n", + " k = random.randint(2,len(children)-1) \n", + " for i in range(0,len(children)):\n", + " # add the first k \n", + " if i < k:\n", + " new.add_child(children[i])\n", + " else:\n", + " node.add_child(children[i])\n", + "\n", + "\n", + "def mutateLabeledTree(tree, n):\n", + " e = f = c = 0\n", + " for i in range(0, n):\n", + " logging.info(i)\n", + " #potential nodes\n", + "\n", + " potential_nodes_to_collapse = []\n", + " for n in tree.internal_nodes():\n", + " #skip the root node\n", + " if n.parent_node == None:\n", + " continue\n", + " if n.label == n.parent_node.label and n.label != None:\n", + " potential_nodes_to_collapse.append(n)\n", + " potential_nodes_to_flip = [n for n in tree.internal_nodes() if n.label != None]\n", + " potential_nodes_to_expand = [n for n in tree.internal_nodes() if \n", + " len(n.incident_edges()) > 3 and n != tree.seed_node]\n", + " \n", + " \n", + " ncol = len(potential_nodes_to_collapse)\n", + " nflip = len(potential_nodes_to_flip)\n", + " nexp = len(potential_nodes_to_expand)\n", + " \n", + " # 30% chance of flipping\n", + " if random.random() < 0.3:\n", + " flip_node(potential_nodes_to_flip[random.randint(0,nflip-1)])\n", + " f += 1\n", + " else:\n", + " x = random.randint(0,ncol+nexp-1)\n", + " if x < ncol:\n", + " collapse_node(potential_nodes_to_collapse[x])\n", + " c += 1\n", + " else:\n", + " e += 1\n", + " extend_node(potential_nodes_to_expand[x-ncol])\n", + " logging.info(tree)\n", + " logging.info('#flips %s #col %s, #exp %s',f,c,e)\n", + " return(tree)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Data analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'int' object has no attribute 'append'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mt2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmutateLabeledTree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdepth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mLRF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mres_rf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtreecompare\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msymmetric_difference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mt2\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'append'" + ] + } + ], + "source": [ + "# Simulation & computation\n", + "\n", + "t = cloneAndReroot(t5)\n", + "random.seed(12)\n", + "abort = False\n", + "res = [0]*20\n", + "res_rf = [0]*20\n", + "logging.root.level = 40\n", + "for i in range(1,15):\n", + " if abort:\n", + " break\n", + " res.append([])\n", + " res_rf.append([])\n", + " for j in range(0,20):\n", + " #if i == 4 and j == 12:\n", + " # abort = True\n", + " # break\n", + " t2 = mutateLabeledTree(t.clone(depth=1), i)\n", + " \n", + " res[i].append(LRF(t,t2))\n", + " res_rf[i].append(treecompare.symmetric_difference(t,t2))\n", + " t = t2;\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "setting an array element with a sequence.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtranspose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Labelled Robinson Foulds:'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: setting an array element with a sequence." + ] + } + ], + "source": [ + "x = np.array(res)\n", + "y = x.transpose()\n", + "df = pd.DataFrame(y)\n", + "print('Labelled Robinson Foulds:')\n", + "df.plot.box()" + ] + }, + { + "cell_type": "code", + "execution_count": 1880, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Standard Robinson Foulds:\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1880, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfwAAAFkCAYAAADFZ4k9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X2QHHd95/H3l0C8O2IRV5HB8hFlrSAzs1IA7zoQTpgk\nECDnJBvZVyQsVuVyINl5gFA6qhIo7LMdO3eEFAhiyJUNzgWQWR4uOEhVBBxCDgIJUMwIiKWZSJy1\nGJDsEhiE8O46HP7dHz0r7672aXanZ3a336+qrdb0dM/31zOj/vSvpx8ipYQkSVrfHtftBkiSpPwZ\n+JIkFYCBL0lSARj4kiQVgIEvSVIBGPiSJBWAgS9JUgEY+JIkFYCBL0lSARj4kiQVQMuBHxFXRMTB\niPhWRDwaEcPTnnt8RPxpRHw1In7QnOY9EbG5vc2WJEmtWE4PfwPwZeD3gdkX4i8BzwZuBi4DrgKe\nAXx0BW2UJEkrFCu5eU5EPArsSikdXGCay4EvAD+VUvrmsotJkqRl68Rv+E8m2xPwvQ7UkiRJc3h8\nni8eERcAbwLen1L6wTzT/ATwUmAMmMyzPZIkrTM9QD/wiZTSdxaaMLfAj4jHAx8m693/3gKTvhS4\nK692SJJUANcA719oglwCf1rY/yTwwvl6901jAAcOHKBSqSyr3r59+9i/f/+y5l0pa1vb2ta29vJM\nTEwwNjY2Y9xb3vIWXve6180Y19/fT29vb+7t6dZynzgBN9zwFm655XVccslj0y1luev1Ort374Zm\nli6k7YE/Ley3Ar+YUvruIrNMAlQqFQYHB5dVc+PGjcued6WsbW1rW9vay7dz584Zjz/4wQ9yzTXX\ndKT2bN1a7lOn4H3v+yCvetU1bF7+SeyL/iTecuBHxAbg6UA0R22NiGcBDwEngb8mOzXvV4EnRMRT\nm9M9lFL6Yav1JEnFcOoU/Ou/ZsMVBN+as3kzPOMZ+S/zco7Svxw4DFTJfp9/C1AjO/f+acCvNYdf\nJtsAONUcPq8N7ZUkrVOnTsGxY9lQ7ddyDz+l9GkW3lDwcr2SJK0y6yKcR0ZGrG1ta1vb2uugNhRz\nuTtRe0VX2mtLAyIGgWq1Wu3aASqSpO6r1WBoCKpVMA6WplarMTQ0BDCUUqotNO266OFLkqSFGfiS\nJBWAgS9JUhdNTMCRI9kwTwa+JGlV6OmBgYFsWCT1OuzYkQ3zlOvNcyRJWqqBgaynq3zYw5ckqQAM\nfEmSCsDAlySpAAx8SZIKwMCXJKkADHxJkgrA0/IkSeqiSgXuvRe2bs23jj18SdKqcPQobN+eDYuk\ntzdb7t7efOsY+JKkVWFyMgv7yclut2R9MvAlSQJGR0e73YRcGfiSJGHgS5KkdcDAlySpADwtT5JU\nSKOjozN24x86dIjh4eFzj0dGRhgZGelG03Jh4EuSCml2oA8PD3Pw4MGOt+PUKbj9drjuOti8Ob86\n7tKXJK0KmzfDjTfmG3qr0alTcPPN2TBP9vAlSavC5s1w003dbsX6ZQ9fkiRYV7/Xz8XAlyQJA1+S\nJK0DBr4kSQVg4EuSVAAGviRJXdTTAwMD2TBPnpYnSVoVJibgvvtg69b87w2/mgwMwJEj+dexhy9J\nWhXqddixIxuq/Qx8SZIKwMCXJKkADHxJkgrAwJckqQAMfEmSCsDAlySpAAx8SZK66OhR2L49G+bJ\nC+9IklaFSgXuvTe78E6RTE5mYT85mW8dA1+StCr09mY9XeWj5V36EXFFRByMiG9FxKMRMTzHNH8c\nEScjYjwi/i4int6e5kqSpOVYzm/4G4AvA78PpNlPRsQfAa8GrgOeAzwMfCIifnwF7ZQkSSvQ8i79\nlNLHgY8DRETMMclrgVtSSoea0/wW8CCwC/jQ8psqSZKWq61H6UfEJcBFwN9PjUspfR/4AvC8dtaS\nJElL1+6D9i4i283/4KzxDzafkyStIuPj4zQajUWnK5fLlEqlDrSoMzq93MePw9mzcz83dXfA+e4S\n2NcH27atuAkdO0o/mOP3/un27dvHxo0bZ4wbGRlhZGQkz3ZJUqE1Gg2GhoYWna5arTI4ONiBFnVG\nJ5f7+HG49NLFp9u9e/7njh2DL31plNHR0Rnjz5w5s+R2tDvwHyAL96cys5f/FODwQjPu379/XX2Z\nJGktKJfLVKvVc4/r9Sx4DhzIzoufPl3eTp2C22+H666DzZvzrTV7uU+fho98BK6+Gi68cOZ0KzXV\ns5/9ni7F1Odx9uzcneBarbakDRdoc+CnlE5ExAPAi4CvAkTEk4DnAu9sZy1J0sqVSqUZna3Nm+HG\nG+GFL8w/dGc7dQpuvhmGh/OvPXu5azW4445sYyOvvmelkt9rL0XLgR8RG4Cnk/XkAbZGxLOAh1JK\n3wDeBlwfEV8DxoBbgG8CH21LiyVJudm8GW66qdutUB6W08O/HPgHst/kE/CW5vj3AK9MKb05IkrA\n7cCTgX8E/mNK6d/a0F5JkrQMyzkP/9MscjpfSukm4KblNUmSJLWbd8uTJKkADHxJkgrAwJckqQAM\nfEnSqtDTAwMD2bBItTulU1fakyStARMTcN99sHVrdn/6ThoYgCNHOltzNdTuFHv4kqRz6nXYsWP+\n67pr7TLwJUkqAANfkqQCMPAlSSoAA1+SpAIw8CVJKgADX5KkAjDwJUmrwtGjsH17NixS7U7xwjuS\npHMqFbj33uzCO502OZkF7uRksWp3ioEvSTqntzfr6Wr9cZe+JEkFYOBL0jxGR0e73QSpbQx8SZqH\nga/1xMCXJKkAPGhPktQxx4/D2bNzPzd1h7757tTX1wfbtq3N2quBgS9JTaOjozN24x86dIjh4eFz\nj0dGRhgZGelG09aF48fh0ksXn2737vmfO3ZsecHbzdqrhYEvSU2zA314eJiDBw92sUWdd+oU3H47\nXHcdbN7c3tee6l0fOJCd79+Kej0L4/l66Ku59mph4EuSzjl1Cm6+GYaH2x/4UyoVGBzM57VXc+1u\n86A9SZIKwMCXpHn4e73WEwNfkuZh4Gs9MfAlSSoAA1+SpAIw8CVJKgADX5J0Tk8PDAxkQ60vnocv\nSTpnYACOHOl2K5QHe/iSJBWAPXxJknIUE+NcRoPeeW7Ms5DeOlwGxEQZKK2oHQa+JEk56hlrUGMI\nFrgxz3wqQA2oj1Vh58quCWzgS5KUo8n+MoNUuWuZN+65Zjfc2V9ecTsMfEmScpR6SxxmkIkK0GIn\nfQI4DKTelbfDg/YkSSoAA1+SpAIw8CVJ5xw9Ctu3Z0OtLwa+JOmcycks7Ccnu90StVvbAz8iHhcR\nt0TEfRExHhFfi4jr211HkiQtXR5H6b8euA74LeAocDnwVxHxvZTSO3KoJ0mSFpFH4D8P+GhK6ePN\nx/dHxCuA5+RQS5IkLUEev+H/E/CiiNgGEBHPAnYCH8uhliRJWoI8evhvAp4ENCLiR2QbFW9MKX0g\nh1qSJGkJ8gj83wReAbyc7Df8ZwNvj4iTKaX35VBPklZsfHycRqOx6HTlcplSaWU3Mel27ePH4ezZ\nuZ+r12cOZ+vrg23blle3mzeRmXwoq/31u2m5/gMnVlZ7fDwb1motzzrv57AceQT+m4H/nlL6cPPx\nkYjoB94AzBv4+/btY+PGjTPGjYyMMDIykkMTJWmmRqPB0NDQotNVq1UGB1d2E5Nu1j5+HC69dPHp\ndi9wo5djx5YX+t28icyDn27WvnV5ta8E7j9dpeVr4wJT23J797Zee0pfH4yOjjI6Ojpj/JkzZ5b8\nGnkEfglIs8Y9yiLHC+zfv7/t/4kkaanK5TLVavXc49On4SMfgauvhgsvnDndWq491bM/sMwbueze\nPf/egcV08yYyV+wtczdV+vuhp+f850+cgOtvgFtvgUsuOf/5DRtgy0uWV3vXrmxYLsNcO2im3tf5\nPpOpvSrbtp3fCa7VakvaWIR8Av8Q8MaI+AZwhGxzaB/w7hxqSVJblEqlGZ2OWg3uuAOuuw7y7ot0\no3alkv9yzdbNm8hs2lLiqlvmLzpRg8M3wEVXQqXN78umTbBnz+LT5f2Z5BH4rwZuAd4JPAU4CfzP\n5jhJktQFbQ/8lNLDwH9t/kmSpFXAa+lLklQABr4kSQVg4EuSVAAGviSp8Hp6YGBg7lP21kvtPI7S\nl6Q1rwgBoMcMDMCRI+u7toEvSXMoQgCoWNylL0lSARj4kiQVgIEvSVIBGPiSJBWAgS9JUgEY+JIk\nFYCBL0kqvKNHYfv2bLheaxv4kjSHIgSAHjM5mb3fk5Prt7aBL0lzKEIAqFgMfEmSCsDAlySpAAx8\nSZIKwMCXJKkADHxJkgrA2+NKmmF8fJxGo7HodOVymVKptKZrHz8OZ8/O/Vy9PnM4W18fbNu29mrH\nxDiX0aB3ntdeSG8dLgNiogy0/v6Pj2fDWq312vO9F8s1+7t2+jRce202nN6+PL7ns23eDDfemA1z\nlVLq6h8wCKRqtZokdV+1Wk3Aon95/J/tZO1jx1KClf0dO7b2ah89UF1x8aMHlvf+v+td3Vvu2br5\nPW+nacsxmBbJW3v4kmYol8tUq9Vzj+t12L0bDhyASmXmdGu59lTvevZrL8VUu+broa/m2pP9ZQap\nctcya1+zG+7sX977v2tXNiyXYa5O83yf95SV7lWZbvZ3baHp1gsDX9IMpVKJwcHB88ZXKjDH6DVf\nuxPLtZpqp94ShxlkokK2f7UFE8BhIPUur/amTbBnz+LTdfO7tp550J4kSQVg4EuSVAAGviRJBWDg\nS5JUAAa+JEldNDEBR45kwzwZ+JIW1NMDAwPZsEi11XlF/bzrddixo/0XF5rN0/IkLWhgIOt9FK22\nOs/PO1/28CVJKgADX5KkAjDwJUkqAANfkqQCMPAlSSoAA1+SpALwtDxJkrqoUoF774WtW/OtYw9f\n0oKOHoXt27NhkWqr84r6eff2Zsvdu8zbDi+VgS9pQZOT2Qp4crJYtdV5ft75MvAlSSqAXAI/Ii6O\niPdFxLcjYjwivhIRg3nUkiRJi2t74EfEk4HPAY8ALwUqwOuA77a7liRJ7fKa17ym203IVR49/NcD\n96eU9qSUqimlr6eUPplSOpFDLUmS2uLDH/5wt5uQqzwC/9eAL0XEhyLiwYioRcSeHOpIkqQlyuM8\n/K3A7wJvAf4EeC7w5xExmVI6kEM9ad0ZHx+n0WgsOl25XKZUKq243vHjcPbs3M9N3aN7vnt19/XB\ntm1rr3ZMjHMZDXqXcQ/y3jpcBsREGWj9/e9m7fHxbFirtV673fdrn/09n+/zbtf3fLWYvdynT8NH\nPgJXXw0XXvjYdO1e7kgpte3FACLiEeCLKaUrpo17O3B5SmnnHNMPAtUXvOAFbNy4ccZzIyMjjIyM\ntLV90lpQq9UYGhpadLpqtcrg4MqOhz1+HC69dEUvwbFjywvebtau31Wjsnvx93jB1zhQpXJN6+9/\nN2u/+92wd++KSi/7PZ+tk9/zubzmNa+ZsRv/wQcf5KlPfeq5xy972cu47bbb2l53ucs9OjrK6Ojo\njGnOnDnDZz7zGYChlNKCm3F59PBPAbO3A+vA1QvNtH///lw+UGktKpfLVKvVc48X6gGs1FTv+sCB\n7IpfrajXYffu+Xvoq7n2ZH+ZQarctcza1+yGO/uX9/53s/auXdmwXIa5Oo9T7+t8n8lK9+hMN/t7\nvtB0ebjttttmBPpFF13EAw88kEut6Za73HN1gpe68QD5BP7ngGfMGvcM4Os51JLWpVKpNGMDuFaD\nO+6A666DvLaLK5X8Xns11k69JQ4zyEQFaLH2BHAYSMu8Mlo3a2/aBHuWcFRVJz6T2d/zoujWcudx\n0N5+4Oci4g0R8dMR8QpgD/COHGpJkqQlaHvgp5S+BFwFjAD/ArwReG1K6QPtriVJUru87GUv63YT\ncpXL3fJSSh8DPpbHa0uSlIc8DtBbTbw9rqRCWk2np0mdYOBLKqSp06BXcopaX1972iJ1goEvqZBW\n0+lpq0lPDwwMZEOtLwa+tAa4Em6/1XR62moyMABHjnS7FcqDgS+tAa6EJa1UHufhS5KkVcbAlySp\nAAx8SZIKwMCXJKkADHxJmoNnRmi98Sh9SZqDZ0ZovbGHL0k65+hR2L49G2p9MfClNcCVsDplcjL7\nnk1OdrslajcDX1oDXAlLWikDX5KkAjDwJUkqAANfkiRgdHS0203IlYEvSRIGviQVkmdGaL3xwjvS\nPMbHx2k0GotOVy6XKZVKKy3G/fc0ePjhuZ9+4ARcBjzwMajXz39+wwbY8pIyLKMdMTHOZTToneN1\nF9Nbz9oVE2VgbdVeTJ5nRoyPZ8NarfV55/r822nzZrjxxmyo9cXAl+bRaDQYGhpadLpqtcrg4OCK\nat1/T4MtV81fqwJcCXDDAq9xd5Utu1pvR89YgxpDsLvlWakANaA+VoWda6t2N01tR+7du/zX6Otr\nT1tm27wZbropn9debUZHR2fsxj906BDDw8PnHo+MjDAyMtKNpuXCwJfmUS6XqVar5x6fPg0f+Qhc\nfTVceOHM6VbqOxeW2UWVW2+BSy5pbd4TJ+D6G+DOC8tsWUbtyf4yg1S56wBUKq3NW6/DNbvhzv7l\nvQfdrN1Nu3Zlw/I8O2Xqddi9Gw7M87709cG2bfm2sQhmB/rw8DAHDx7sYovyZeBL8yiVSjN67rUa\n3HEHXHcdrLBDf57UW+Iwg1x0JVRafO2JGhy+AVLvympPVIBWawOHWZu1u2nTJtizZ/HpKpX2f9dU\nXB60J0lSARj4kiTBuvq9fi4GviRJGPiSVEienqb1xsCXpDlMnZ5WtMCfmIAjR7Kh1hcDX5JWmZ4e\nGBjIhp1Wr8OOHflf4Eed52l50hJ1cyWsYhkYyHrZUjsZ+NISuRKWtJa5S1+SpAIw8CVJKgADX5Kk\nAjDwJWkOnp6m9cbAl6Q5eHqa1huP0pcknVOpwL33wtat3W6J2s0eviStMkePwvbt2bDTenuz2r1r\n8LbDWpiBLy1RN1fCKpbJyex7NjnZ7ZZoPTHwpSVyJSxpLTPwJUkqgNwDPyLeEBGPRsRb864lSZLm\nlmvgR8TPAnuBr+RZR5IkLSy3wI+IJwIHgD3A9/KqI0l5mDo9rVLpdkuk9sizh/9O4FBK6VM51pCk\nXBT19LRTp+Cmm7Kh1pdcLrwTES8Hng1cnsfra307fhzOns3+PTExzthYY9F5+vvL9PaW6OuDbdva\nU3u2qSuuzXfltZXUHh/PhrVa6/Ou5SvBrablHh8fp9FY/LtWLpcplUq51j59Gq69NhtOf286Ubte\nh5tvzr7L0/du5FFbndX2wI+IpwFvA16cUvrhUufbt28fGzdunDFuZGSEkZGRNrdQq9nx43DppdPH\nNIChJcxZBQYBOHZsecF7fu257d49/3PLrT21vt27t/V5p/T1LX/ebllNy91oNBgaWvy7Vq1WGRwc\nbE/RRWrfcUf3as/+nudRW60ZHR1ldHR0xrgzZ84sef48evhDwIVANSKiOe7HgBdExKuBC1JKafZM\n+/fv98ukc73rAwey3sXERJmxseqi8/X3lxkby1ZS8/XQW63dinp9ZbV37cqG5TLM1Ymaev352rbS\nPRvdspqWu1wuU60u/l0rl8vtKWhttWiuTnCtVlvShirkE/ifBH5m1ri/AurAm+YKe2m2SgWy7b8S\nO3cubUOwXb+1Pla7czZtgj17Fp+uG23L02pa7lKp1LVOR1Frq7PaHvgppYeBGRcfjYiHge+klNbw\nr42SJK1dnbrSnr16SZK6qCO3x00pvbATdSRJ0tw6EviSVq/VdGqcpPwY+FLBraZT4yTlx8CX1oCe\nHhgYyIbttppOjZstz+WWisbAl9aAgQE4ciSf115Np8bNludyS0XTqaP0JUlSFxn4kiQVgIEvSVIB\nGPiSJBWAgS9JUgEY+JIW5Klx0vrgaXmSFuSpcdL6YA9fWgOOHoXt27NhkRR1uaU8GPjSGjA5mYXe\n5GS3W9JZRV1uKQ8GviRJBWDgS5JUAAa+JEkFYOBL0jxGR0e73QSpbQx8SZqHga/1xMCXtCBPjZPW\nBy+8ozkdPw5nz2b/npgYZ2ysseg8/f1lentL9PXBtm3LqxsT41xGg9566/P21uEyICbKQKnl+Scf\nymp//W5arv/AiZXVnm18fJxG47H3/PRpuPbabFirPTZduVymVFp5vYVq1+tZ2B8+PPP0uDxqz7Z5\nM9x4YzaUtDIGvs5z/Dhceun0MQ1gaAlzVoFBAI4dW17o94w1qDEEu1uftwLUgPpYFXYOtjz/g59u\n1r51ebWvBO4//dh7sBKNRoOhofPf8zvumPm4Wq0yOLjyekupvXvWZ5JH7dk2b4abbsq1xAyjo6Mz\nduMfOnSI4eHhc49HRkYYGRnpXIOkNjLwdZ6pnv2BA1CpwMREmbGx6qLz9feXGRvLgmHqNVo12V9m\nkCp3NWu3ol6Ha3bDnf3lZdW+Ym+Zu6nS3z/3deNPnIDrb4Bbb4FLLjn/+Q0bYMtLlld7tnK5TLW6\n+HteLren3mqp3W2zA314eJiDBw92sUVS+xj4mlelAlkHrsTOJfaYe3tXVjP1ljjMIBMVWu4oTwCH\ngbTMNmzaUuKqW+YvOlGDwzfARVdCJd+OLaVSKffe82qsLSk/HrQnSVIBGPiSNA9/r9d6YuBL0jwM\nfK0nBr4kSQVg4EtatSYm4MiRbChpZQx8aYl6emBgYO5T9pSPeh127MiGklbG0/KkJRoYyHqbkrQW\n2cOXJKkADHxJkgrAwJckqQAMfEmSCsDAlySpAAx8SZIKwNPyJK1alQrcey9s3drtlkhrnz18aYmO\nHoXt27OhOqO3N3vPV3rbZUkGvrRkk5NZ2E9OdrslktQ6A1+SpAJoe+BHxBsi4osR8f2IeDAi7o6I\nS9tdR5IkLV0ePfwrgNuA5wK/BDwBuCci/BVOkqQuaXvgp5SuTCm9L6VUTyn9C/DbwBZgqN21pCIZ\nHR3tdhMkrWGd+A3/yUACHupALWndMvAlrUSugR8RAbwN+GxKyZOZJLXk1Cm46aZsKGll8r7wzl8A\nA8DOnOusP+Pj3H9Pg4cfzh4+8sgEJ0+OLTrbxRf3c8EFvWzYAFteUoZSqeXSkw+NcxkNvn439NZb\nm/eBE3AZEBNloPXa4+PZsFbLhhMT44yNNRadr7+/zNhY6/UWbss4jcZjtU+fhmuvzYZT7QMol8uU\nlvE+63yz3/N6HW6+GbZtyy7CM8X3XGpdboEfEe8ArgSuSCktun2+b98+Nm7cOGPcyMgIIyMjObVw\ndbv/ngZbrpp52MOzW32Nu6ts2TXYcu0HP92gxhDc2vKsVMg+9PtPV4HWa0+t6/fuPTeGpR3+8Vi9\nvr6Wy87TlgZDQ+fXvuOOWZWrVQYHW1/WxYyOjs7YjX/o0CGGh4fPPV6P/z/me8937575OK/3XFrN\nZq8TAM6cObPk+SOl1O42TYX9rwM/n1K6b5FpB4Gq/4FnOvy5cV71/Aa33gKXXNJaD//kyV6uvwHu\n/GyZy3a23gv69v3j/OO7GvT3Q0/P+c+fOAHX38C5ts22kr0L3/42/M3fQLk5eys9/N7eEn19WW+w\nHWb3NufTqd7m8PAwBw8ezL1ON62291xa7Wq12tRG8lBKqbbQtG3v4UfEXwAjwDDwcEQ8tfnUmZSS\n1yhbotRb4jCDXHQlVJrbQc9e4i8jj9bg8A2Qlnki5KYtJa66Zf6Nr4nm609vW7ts2gR79kwfU2Ln\nzu5sCJZKJTdCO8z3XMpPHgft/Q7wJOD/ACen/f1GDrUkSdIStL2Hn1Lycr1SDtbb7/WSOstwltYI\nA1/SShj4kiQVgIEvSVIBGPhqWU8PDAzMfcqeJGl1yvtKe1qHBgbgyJFut0KS1Ap7+JIkFYCBL0lS\nARj4kiQVgIEvSVIBGPiSJBWAgS9JUgEY+JIkFYCBr5YdPQrbt2dDSdLaYOCrZZOTWdhPTna7JZKk\npTLwJUkqAANfkqQCMPAlSSoAA1+SpAIw8CVJKgADX5KkAnh8txuwqo2Pc/89DR5+OHv4yCMTnDw5\ntuhsF1/czwUX9LJhA2x5SRlKpeWUBqBWa3lW6vXW51m4LeM0Go1zj0+fhmuvzYbT21culyktY1kl\nSfkz8Bdw/z0Ntlw1NGPcs1t9jburbNk12HLtqXzdu7flWc/p61v+vDPb0mBoaOi88XfcMfNxtVpl\ncLD1ZZUk5c/AX8B3Liyziyq33gKXXNJaD//kyV6uvwHuvLDMlmXU3rUrG5bn2UFQr8Pu3XDgAFQq\n5z/f1wfbti2j8BzK5TLVanVJ00mSVicDfwGpt8RhBrnoSqg0O67PZueS5n20BodvgNS7vNqbNsGe\nPYtPV6lA3p3qUqlkz12S1jgP2pMkqQAMfEmSCsDAlySpAAx8SZIKwMCXJKkADPw1qqcHBgayoSRJ\ni/G0vDVqYACOHOl2KyRJa4U9fEmSCsDAlySpAAx8SZIKwMCXJKkADHxJkgrAwJckqQAMfEmSCsDA\nX6OOHoXt27OhJEmLMfDXqMnJLOwnJ7vdEknSWrAuAn90dLSQtaGYy21ta1vb2tZuXW6BHxG/HxEn\nImIiIj4fET+bV631/iEtUL17lQv6nlvb2ta29lqtnUvgR8RvAm8BbgQuA74CfCIiNuVRT5IkLSyv\nHv4+4PaU0ntTSg3gd4Bx4JU51ZMkSQtoe+BHxBOAIeDvp8allBLwSeB57a4nSZIWl8ftcTcBPwY8\nOGv8g8Az5pi+B6Ber8/5Yt89NcHhu8fOPZ6cPMs3v/nVGdN89d6v8ju/8Hszxj3tac+kp6ePpzwF\ndvxKP/T2trQQAIcPZ8O774Z6HR55ZIKTJ8dmTHPs2De59da7Zoy7+OJ+Tp7M6s2zWC2bmJhgbOyx\n2idOAHyTj33srhk1+vv76V3GsrbqzJkz1Gq13OtY29rWtra15zctO3sWmzayznf7RMRm4FvA81JK\nX5g2/s3A81NK/2HW9K8AZiamJElqxTUppfcvNEEePfxvAz8Cnjpr/FM4v9cP8AngGmAM8KxySZKW\nrgfoJ8vSBbW9hw8QEZ8HvpBSem3zcQD3A3+eUvqztheUJEkLyqOHD/BW4D0RUQW+SHbUfgn4q5zq\nSZKkBeQXq/zJAAAH7klEQVQS+CmlDzXPuf9jsl37XwZemlI6nUc9SZK0sFx26UuSpNVlXVxLX5Ik\nLczAlySpANZ04HfyBj2z6l4REQcj4lsR8WhEDHeo7hsi4osR8f2IeDAi7o6ISztU+3ci4isRcab5\n908R8cudqD1HW97QfN/f2oFaNzZrTf87mnfdafUvjoj3RcS3I2K8+RkMdqDuiTmW+9GIuK0DtR8X\nEbdExH3NZf5aRFyfd91p9Z8YEW+LiLFm/c9GxOU51Fl0PRIRfxwRJ5vt+LuIeHonakfEVRHx8Yg4\n3Xz+me2ou1jtiHh8RPxpRHw1In7QnOY9zeu75Fq7+fyNEVFv1n6o+Z4/pxO1Z017e3OaP2hH7Slr\nNvC7fIOeDWQHIv4+0MmDIK4AbgOeC/wS8ATgnojI/9J68A3gj8gumzwEfAr4aERUOlD7nOZG3V6y\nz7tT7iU7+PSi5t/zO1E0Ip4MfA54BHgpUAFeB3y3A+Uv57HlvQh4Mdl3/UMdqP164Drg94Ay8IfA\nH0bEqztQG+BO4EVk1wfZAfwd8Ml2hc40C65HIuKPgFeTvRfPAR4mW8f9eN61m89/luz/fLvXcQvV\nLgHPBm4mW69fRXaF1o92oDbAvzaf2wHsJLs+zD0R8RMdqA1AROwi+7y/1YaaM6WU1uQf8Hng7dMe\nB/BN4A873I5HgeEuvQebmvWf36X63wH+SwfrPZHsP+QLgX8A3tqBmjcCtS69v28CPt2N2nO05W3A\nsQ7VOgS8a9a4/w28twO1e4AfAr88a/yXgD/Ose556xHgJLBv2uMnARPAb+Rde9pzP9V8/pmdWu45\nprmc7GJuT+tC7b7mdL/YidrAvye7Zk0FOAH8QTvrrskefniDnilPJttSfKiTRZu7XF9OtjX+zx0s\n/U7gUErpUx2sCbCtuRvu/0bEgYj4yQ7V/TXgSxHxoeZPOLWI2NOh2uc0/79dQ9bz7YR/Al4UEdua\n9Z9F1tv6WAdqP57sXiCPzBo/QYf27ABExCVke1amr+O+D3yBYq3j4LH13Pc6WbT5vb+uWTf3PYoR\nEcB7gTenlNp0F5aZ8rrwTt5avUHPutP8crwN+GxKqSO/KUfEDrKA7wHOAlel7PbHnaj9crJdfW3/\nLXURnwd+m2zPwmbgJuAzEbEjpfRwzrW3Ar9L9tPVn5D9lPPnETGZUjqQc+3prgI2Au/pUL03kfVm\nGxHxI7KfHt+YUvpA3oVTSj+IiH8GboiIBtk65RVkIXs87/rTXEQWcnOt4y7qYDu6KiIuIPs+vD+l\n9IMO1fwV4ANkHZqTwItTSp3oVL0e+LeU0jvyKrBWA38+QWd/U++mvwAGyHo+ndIAnkW2xf2fgPdG\nxAvyDv2IeBrZxs2LU0o/zLPWbCml6denvjcivgh8HfgN4H/lXP5xwBdTSjc0H38lIraTbQR0MvBf\nCfxtSumBDtX7TbKQfTlwlGxD7+0RcTKl9L4O1N8N/CXZb6j/D6gB7wdyP1hyCQqzjouIxwMfJlve\n31tk8nb6FNl6bhPZ8UIfjojnpJS+nVfBiBgC/oDsuIXcrMld+rR+g551JSLeAVwJ/EJK6VSn6qaU\n/l9K6b6UUi2l9Eay3Vyv7UDpIeBCoBoRP4yIHwI/D7w2Iv6tubejI1JKZ4BjQFuOll7EKWD2rr06\nsKUDtQGIiC1kB4i+q1M1gTcD/yOl9OGU0pGU0l3AfuANnSieUjqRUvpFsoOsfjKl9HPAj5P9ptop\nD5CFe1HXcVNh/5PASzrVuwdIKU0013NfTCntJdvoe1XOZZ9Pto77xrR13E8Bb42I+9pVZE0GfrOX\nVyU7khY4t4v7RWS//61bzbD/dbKDSO7vcnMeB1zQgTqfBH6GrKf3rObfl8h6uc9qHr/RERHxROCn\nycI4b5/j/J+onkG2h6FTXkkWMJ34/XxKifN7sY/S4fVVc8X/YET8O7KzJP6mg7VPkIX+9HXck8h+\n1un0Oq6jexSmhf1W4EUppU6clbKQTqzn3gs8k8fWb88i+znhzWTfvbZYy7v0u3aDnojYQNbDm+pZ\nbm0eWPRQSukbOdb9C2AEGAYejoiprf8zKaVcby0cEX8C/C3Z6Xl9ZAdx/TzwkjzrAjR/K59xnEJE\nPAx8J6+DW6bV+TOyo8a/TnYE7c1kW/yjedZt2g98LiLeQHY63HOBPWS7GXPX3Ij+beCvUkqPdqJm\n0yHgjRHxDeAI2a70fcC7O1E8Il5C9n/7X4FtZCvdOm1etyxhPfI24PqI+BrZ6WG3kJ2JtOJT1Bar\n3dzI2UL2nQ+g3Pw+PJBSWtEehoVqk4XcX5Nt3P8q8IRp67mHVvqT3iK1vwO8EThItkG/iey0yIvJ\nNkBWZAmf93dnTf9Dsve7fceOtPOQ/07/kf2uM0Z2BO0/A5d3qO7Pk/U4fjTr7y9zrjtXzR8Bv9WB\nZX43cF/zvX4AuAd4YRc/+0/RmdPyRslWshNkp8u8H7ikg8t5JfBVYJws/F7Zwdovbn6/nt7hz3YD\n2Qb9CbJzz4+TbWg9vkP1XwZ8rfmZfwt4O9CXQ51F1yNkB4mebH7+n2jXZ7FYbeA/z/P8f8uzNo+d\nBjh9/NTjF+Rc+wKyjY1vND/7bwJ3A4Od+rxnTX8fbT4tz5vnSJJUAGvyN3xJktQaA1+SpAIw8CVJ\nKgADX5KkAjDwJUkqAANfkqQCMPAlSSoAA1+SpAIw8CVJKgADX5KkAjDwJUkqgP8P/NE4gB5wycEA\nAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.array(res_rf)\n", + "y = x.transpose()\n", + "df = pd.DataFrame(y)\n", + "print('Standard Robinson Foulds:')\n", + "df.plot.box()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Testing the code" + ] + }, + { + "cell_type": "code", + "execution_count": 1451, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:0\n", + "INFO:root:(R,((H,G,E,(Q,L)speciation)duplication,K,(I,F)speciation,J)duplication)\n", + "INFO:root:1\n", + "INFO:root:(R,((H,G,E,(Q,L)speciation)duplication,K,(I,F)speciation,J)speciation)\n", + "INFO:root:2\n", + "INFO:root:(R,((H,G,E,(Q,L)speciation)duplication,K,I,F,J)speciation)\n", + "INFO:root:3\n", + "INFO:root:(R,(((H,E,(Q,L)speciation)duplication,G)duplication,K,I,F,J)speciation)\n", + "INFO:root:#flips 1 #col 1, #exp 2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(R,(H,(Q,L)speciation,E,K,G,(I,F)speciation,J)duplication)\n", + "/---------------------------------------------------------------------------- R\n", + "| \n", + "| /--------------------------------------------------- H\n", + "| | \n", + "| | /------------------------- Q\n", + "+ |-------------------------+ \n", + "| | \\------------------------- L\n", + "| | \n", + "| |--------------------------------------------------- E\n", + "| | \n", + "\\------------------------+--------------------------------------------------- K\n", + " | \n", + " |--------------------------------------------------- G\n", + " | \n", + " | /------------------------- I\n", + " |-------------------------+ \n", + " | \\------------------------- F\n", + " | \n", + " \\--------------------------------------------------- J\n", + " \n", + " \n", + "(R,(((H,E,(Q,L)speciation)duplication,G)duplication,K,I,F,J)speciation)\n", + "/---------------------------------------------------------------------------- R\n", + "| \n", + "| /------------------------------ H\n", + "| | \n", + "| /---------------+------------------------------ E\n", + "| | | \n", + "+ | | /--------------- Q\n", + "| /--------------+ \\--------------+ \n", + "| | | \\--------------- L\n", + "| | | \n", + "| | \\---------------------------------------------- G\n", + "| | \n", + "\\--------------+------------------------------------------------------------- K\n", + " | \n", + " |------------------------------------------------------------- I\n", + " | \n", + " |------------------------------------------------------------- F\n", + " | \n", + " \\------------------------------------------------------------- J\n", + " \n", + " \n" + ] + } + ], + "source": [ + "logging.root.level = 20\n", + "print(t)\n", + "t.print_plot()\n", + "t2 = mutateLabeledTree(t.clone(depth=1), 4)\n", + "t1 = t.clone(depth=1)\n", + "print(t2)\n", + "t2.print_plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 1452, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:1000000100\n", + "INFO:root:---\n", + "INFO:root:0010111010\n", + "INFO:root:0010110010\n", + "INFO:root:collapsing node \n", + "INFO:root:***\n", + "INFO:root: #component t1 1\n", + "INFO:root:{: <__main__.LRF..maxpath object at 0x10a320048>}\n", + "INFO:root: #component t2 1\n", + "INFO:root:{: <__main__.LRF..maxpath object at 0x10a319fd0>}\n", + "INFO:root: #flips 2\n", + "INFO:root:(R,(H,(Q,L)speciation,E,K,G,I,F,J)arbitrary)\n", + "INFO:root:(R,(H,E,(Q,L)speciation,G,K,I,F,J)arbitrary)\n", + "INFO:root:/---------------------------------------------------------------------------- R\n", + "| \n", + "| /--------------------------------------------------- H\n", + "| | \n", + "| | /------------------------- Q\n", + "+ |-------------------------+ \n", + "| | \\------------------------- L\n", + "| | \n", + "| |--------------------------------------------------- E\n", + "| | \n", + "\\------------------------+--------------------------------------------------- K\n", + " | \n", + " |--------------------------------------------------- G\n", + " | \n", + " |--------------------------------------------------- I\n", + " | \n", + " |--------------------------------------------------- F\n", + " | \n", + " \\--------------------------------------------------- J\n", + " \n", + " \n", + "INFO:root:Matching islands are both of type arbitrary & can save a flip!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step 1, collapse consistent edges: 1\n", + "{('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q'): , ('L', 'Q'): , ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): }\n", + "-\n", + "{('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q'): , ('L', 'Q'): , ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): }\n", + "{512, 4} {64, 160, 2, 256, 8, 16, 1022} {64, 512, 4, 256, 1022} {16, 8, 2, 160}\n", + "step 3, perform remaining flips -1\n", + "**TOTAL** extended RF dist: 4\n" + ] + }, + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 1452, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LRF(t1,t2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Legacy code by Gabriela" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## A) Identifying" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('E', 'F', 'G', 'H') (, )\n", + "('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q') (, )\n", + "('I', 'J', 'K', 'L') (, )\n", + "('I', 'J', 'K', 'L', 'Q') (, )\n", + "('E', 'F') (, )\n", + "('G', 'H') (, )\n", + "('K', 'L') (, )\n", + "('I', 'J') (, )\n", + "\n", + "\n", + "There are 8 bipartitions.\n" + ] + } + ], + "source": [ + "# Comment by CD: the code has a mistake in that it also returns a \"trivial\" bipartition \n", + "# with one taxon on one side (R) and all others on the other side.\n", + "\n", + "def id_bipartitions_and_linkers(tree):\n", + " tree.encode_bipartitions()\n", + " linkers_bipartitions = {}\n", + " labeled_nodes = [node for node in tree if node.label]\n", + " for node in labeled_nodes:\n", + " if len(node.bipartition.leafset_taxa(taxon_namespace = tree.taxon_namespace))>1:\n", + " #if node.parent_node in labeled_nodes:\n", + " linkers_bipartitions[tuple(sorted([nd.label for nd in node.bipartition.leafset_taxa(taxon_namespace = tree.taxon_namespace)]))] = (node, node.parent_node)\n", + " return linkers_bipartitions\n", + "\n", + "# testing:\n", + "for element in id_bipartitions_and_linkers(t1):\n", + " print (element, id_bipartitions_and_linkers(t1)[element])\n", + "print (\"\\n\")\n", + "print (\"There are\", len(id_bipartitions_and_linkers(t1)), \"bipartitions.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The set of differing biparitions is: {('E', 'F', 'G', 'H', 'R'), ('E', 'F', 'G', 'H', 'Q'), ('I', 'J', 'K', 'L', 'Q'), ('I', 'J', 'K', 'L', 'R')} \n", + "\n", + "All the edges that will be collapsed in tree_1 [(, ), (, )]\n", + "All the edges that will be collapsed in tree_2 [(, ), (, )]\n", + "\n", + "\n", + "The edges that will be collapsed in this first step in tree 1 are:\n", + "(, ) \n", + "\n", + "The edges that will be collapsed in this first step in tree 2 are:\n", + "(, ) \n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# note CD: some bipartitions are counted twice (e.g. ('E', 'F', 'G', 'H', 'R') == ('I', 'J', 'K', 'L', 'Q'))\n", + "\n", + "def id_differing(tree_1, tree_2):\n", + " differing_bipartitions = set(id_bipartitions_and_linkers(tree_1).keys()).symmetric_difference(set(id_bipartitions_and_linkers(tree_2)))\n", + " return differing_bipartitions\n", + "differing_bipartitions = id_differing(tree_1,tree_2)\n", + "\n", + "tree_1_edges = [id_bipartitions_and_linkers(tree_1)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_1).keys()]\n", + "tree_2_edges = [id_bipartitions_and_linkers(tree_2)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_2).keys()]\n", + "\n", + "diff_same_tree_1 = [element for element in tree_1_edges if element[0].label == element[1].label]\n", + "diff_same_tree_2 = [element for element in tree_2_edges if element[0].label == element[1].label]\n", + "\n", + "\n", + "# testing\n", + "print (\"The set of differing biparitions is:\", differing_bipartitions, \"\\n\")\n", + "\n", + "\n", + "print (\"All the edges that will be collapsed in tree_1\", tree_1_edges)\n", + "print (\"All the edges that will be collapsed in tree_2\", tree_2_edges)\n", + "print (\"\\n\")\n", + "print (\"The edges that will be collapsed in this first step in tree 1 are:\")\n", + "for elem in range(0, len(diff_same_tree_1)):\n", + " print (diff_same_tree_1[elem], \"\\n\")\n", + "print (\"The edges that will be collapsed in this first step in tree 2 are:\")\n", + "for elem in range(0, len(diff_same_tree_2)):\n", + " print (diff_same_tree_2[elem], \"\\n\")\n", + "print (\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## B) Collapsing" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The first tree is now ((R,((E,F)speciation,(G,H)duplication)speciation)speciation,Q,((K,L)speciation,(J,I)speciation)speciation)duplication\n", + "The differing bipartitions are now only {('E', 'F', 'G', 'H', 'Q'), ('E', 'F', 'G', 'H', 'R')}\n" + ] + } + ], + "source": [ + "for element in diff_same_tree_1: element[0].edge.collapse()\n", + "for element in diff_same_tree_2: element[0].edge.collapse()\n", + "\n", + "# testing\n", + "\n", + "print (\"The first tree is now\", tree_1)\n", + "print (\"The differing bipartitions are now only\", id_differing(tree_1, tree_2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " /--------------------------------------------------------- R\n", + " | \n", + "/------------------+ /------------------- E\n", + "| | /------------------+ \n", + "| | | \\------------------- F\n", + "| \\------------------+ \n", + "| | /------------------- G\n", + "| \\------------------+ \n", + "+ \\------------------- H\n", + "| \n", + "|---------------------------------------------------------------------------- Q\n", + "| \n", + "| /------------------- K\n", + "| /------------------+ \n", + "| | \\------------------- L\n", + "\\-------------------------------------+ \n", + " | /------------------- J\n", + " \\------------------+ \n", + " \\------------------- I\n", + " \n", + " \n", + " /--------------------------------------------------------- Q\n", + " | \n", + "/------------------+ /------------------- E\n", + "| | /------------------+ \n", + "| | | \\------------------- F\n", + "| \\------------------+ \n", + "| | /------------------- G\n", + "| \\------------------+ \n", + "+ \\------------------- H\n", + "| \n", + "|---------------------------------------------------------------------------- R\n", + "| \n", + "| /------------------- K\n", + "| /------------------+ \n", + "| | \\------------------- L\n", + "\\-------------------------------------+ \n", + " | /------------------- J\n", + " \\------------------+ \n", + " \\------------------- I\n", + " \n", + " \n" + ] + } + ], + "source": [ + "tree_1.print_plot()\n", + "tree_2.print_plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 645, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 645, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.randint(2,2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'tuple' object has no attribute 'edge'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdiff_same_tree_1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'edge'" + ] + } + ], + "source": [ + "diff_same_tree_1[0].edge" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## C) How many operations to be added to extended Robinson-Foulds?" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "erf = 0\n", + "erf += len(diff_same_tree_1)+ len(diff_same_tree_2)\n", + "\n", + "#testing\n", + "\n", + "print (erf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# 2) Dealing with diff_diff bipartitions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## A) Identifying" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(, )\n", + "(, )\n", + "(, )\n", + "(, )\n", + "(, None)\n", + "(, )\n", + "(, )\n", + "(, )\n", + "\n", + "\n", + "('There are', 8, 'bipartitions in tree 1.')\n", + "('There are', 8, 'bipartitions in tree 2.')\n" + ] + } + ], + "source": [ + "for element in id_bipartitions_and_linkers(tree_1):\n", + " print (id_bipartitions_and_linkers(tree_1)[element])\n", + "print (\"\\n\")\n", + "print (\"There are\", len(id_bipartitions_and_linkers(tree_1)), \"bipartitions in tree 1.\")\n", + "print (\"There are\", len(id_bipartitions_and_linkers(tree_2)), \"bipartitions in tree 2.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "set([('E', 'F', 'G', 'H', 'R'), ('E', 'F', 'G', 'H', 'Q')])\n", + "('The edges that will be collapsed in tree 1 in this step are', [(, )])\n", + "('The edges that will be collapsed in tree 2 in this step are', [(, )])\n" + ] + } + ], + "source": [ + "differing_bipartitions = id_differing(tree_1,tree_2)\n", + "\n", + "tree_1_edges = [id_bipartitions_and_linkers(tree_1)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_1).keys()]\n", + "tree_2_edges = [id_bipartitions_and_linkers(tree_2)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_2).keys()]\n", + "\n", + "diff_diff_tree_1 = [element for element in tree_1_edges if element[0].label != element[1].label]\n", + "diff_diff_tree_2 = [element for element in tree_2_edges if element[0].label != element[1].label]\n", + "\n", + "# testing\n", + "print (differing_bipartitions)\n", + "print (\"The edges that will be collapsed in tree 1 in this step are\", diff_diff_tree_1)\n", + "print (\"The edges that will be collapsed in tree 2 in this step are\", diff_diff_tree_2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## B) Organizing in subtrees" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{: [], : []}]\n", + "[{: [], : []}]\n" + ] + } + ], + "source": [ + "def adjacent_edges(node,diff):\n", + " output = []\n", + " for edge in diff:\n", + " if node in edge:\n", + " output.append(edge)\n", + " return output\n", + "\n", + "visited = {}\n", + "for edge in diff_diff_tree_1:\n", + " visited[edge] = False\n", + "\n", + "def explore(node,orig,diff, subtree = defaultdict(set)):\n", + " for edge in adjacent_edges(node,diff):\n", + " if edge == orig:\n", + " continue\n", + " else:\n", + " if edge[0] == node:\n", + " dest = edge[1]\n", + " else:\n", + " dest = edge[0]\n", + " subtree[node].add(dest)\n", + " subtree[dest].add(node)\n", + " orig = edge\n", + " explore(dest,orig,diff)\n", + " return subtree\n", + "\n", + "\n", + "subtrees_tree_1 = []\n", + "subtrees_tree_2 = []\n", + "nodes_tree_1 = set(list(itertools.chain(*diff_diff_tree_1)))\n", + "nodes_tree_2 = set(list(itertools.chain(*diff_diff_tree_2)))\n", + "\n", + "for node in nodes_tree_1:\n", + " subtree = defaultdict(set)\n", + " if explore(node, None, diff_diff_tree_1, subtree = subtree) not in subtrees_tree_1: \n", + " subtrees_tree_1.append(explore(node, None, diff_diff_tree_1, subtree = subtree))\n", + " \n", + "for node in nodes_tree_2:\n", + " subtree = defaultdict(set)\n", + " if explore(node, None, diff_diff_tree_2, subtree=subtree) not in subtrees_tree_2: \n", + " subtrees_tree_2.append(explore(node, None, diff_diff_tree_2, subtree= subtree))\n", + "\n", + "for i in range(0, len(subtrees_tree_1)): \n", + " subtrees_tree_1[i] = dict(subtrees_tree_1[i])\n", + " for j in subtrees_tree_1[i]:\n", + " subtrees_tree_1[i][j] = list(subtrees_tree_1[i][j])\n", + "for i in range(0, len(subtrees_tree_2)): \n", + " subtrees_tree_2[i] = dict(subtrees_tree_2[i])\n", + " for j in subtrees_tree_2[i]:\n", + " subtrees_tree_2[i][j] = list(subtrees_tree_2[i][j])\n", + "\n", + "print (subtrees_tree_1)\n", + "print (subtrees_tree_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## C) Longest chains for all subtrees" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{: None, : 1}\n", + "\n", + "['speciation']\n", + "['speciation']\n", + "('List of chain lengths', [1, 1])\n", + "('Total number of flips is', 2)\n" + ] + } + ], + "source": [ + "def bfs(start, adj):\n", + " distances = {}\n", + " distances[start] = None\n", + " i = 1\n", + " frontier = [start]\n", + " while len(frontier)>0:\n", + " next_array = []\n", + " for u in frontier:\n", + " for v in adj[u]:\n", + " if v not in distances:\n", + " distances[v]=i\n", + " next_array.append(v)\n", + " frontier = next_array\n", + " i+=1\n", + " return distances\n", + "\n", + "for i in range(0, len(subtrees_tree_1)):\n", + " print (bfs(list(nodes_tree_1)[0], subtrees_tree_1[i]))\n", + " \n", + "import math \n", + "\n", + "def longest_distance(distances):\n", + " values = list(distances.values())\n", + " keys = list(distances.keys())\n", + " return keys[values.index(max(values))], max(values)\n", + "\n", + "list_of_chains = []\n", + "end_types_1 = []\n", + "end_types_2 = []\n", + "\n", + "for i in range(0, len(subtrees_tree_1)):\n", + " print list(nodes_tree_1)[0]\n", + " first_node = longest_distance(bfs(list(nodes_tree_1)[0], subtrees_tree_1[i]))[0]\n", + " furthest_node = longest_distance(bfs(first_node,subtrees_tree_1[i]))[1]\n", + " list_of_chains.append(furthest_node)\n", + " if furthest_node % 2 == 1:\n", + " if (furthest_node - math.ceil(furthest_node/2))%2 == 0:\n", + " end_type = first_node.label\n", + " else:\n", + " if first_node.label == \"speciation\":\n", + " end_type = \"duplication\"\n", + " elif first_node.label == \"duplication\":\n", + " end_type = \"speciation\"\n", + " elif furthest_node % 2 == 0:\n", + " end_type = \"undetermined\"\n", + " end_types_1.append(end_type)\n", + "\n", + "for i in range(0, len(subtrees_tree_2)):\n", + " first_node = longest_distance(bfs(list(nodes_tree_2)[0], subtrees_tree_2[i]))[0]\n", + " furthest_node = longest_distance(bfs(first_node,subtrees_tree_2[i]))[1]\n", + " list_of_chains.append(furthest_node)\n", + " if furthest_node % 2 == 1:\n", + " if (furthest_node - math.ceil(furthest_node/2))%2 == 0:\n", + " end_type = first_node.label\n", + " else:\n", + " if first_node.label == \"speciation\":\n", + " end_type = \"duplication\"\n", + " elif first_node.label == \"duplication\":\n", + " end_type = \"speciation\"\n", + " elif furthest_node % 2 == 0:\n", + " end_type = \"undetermined\"\n", + " end_types_2.append(end_type)\n", + " \n", + "print (end_types_1)\n", + "print (end_types_2)\n", + "\n", + "print (\"List of chain lengths\", list_of_chains)\n", + "flips = 0\n", + "for i in list_of_chains:\n", + " if i == 1:\n", + " flips += 1\n", + " elif i > 1:\n", + " flips += math.floor(i/2)\n", + " \n", + "print (\"Total number of flips is\", int(flips))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## D) Collapsing " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def family_edges(subtrees_tree_1, diff_diff_tree_1):\n", + " family_edges_1 = []\n", + " for subtree in subtrees_tree_1:\n", + " family = subtree.keys()\n", + " family_edges = []\n", + " for edge in diff_diff_tree_1:\n", + " for key in family:\n", + " if key in edge:\n", + " family_edges.append(edge)\n", + " family_edges_1.append(list(set(family_edges)))\n", + " return family_edges_1\n", + "\n", + "\n", + "for family in range(0, len(family_edges(subtrees_tree_1, diff_diff_tree_1))):\n", + " for element in family_edges(subtrees_tree_1, diff_diff_tree_1)[family]:\n", + " element[0].edge.collapse()\n", + " final_node = element[1]\n", + " final_node.label = end_types_1[family] \n", + "\n", + "for family in range(0, len(family_edges(subtrees_tree_2, diff_diff_tree_2))):\n", + " for element in family_edges(subtrees_tree_2, diff_diff_tree_2)[family]:\n", + " element[0].edge.collapse()\n", + " final_node = element[1]\n", + " final_node.label = end_types_2[family]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "source": [ + "## E) How many operations to be added to the extended Robinson-Foulds metric?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "erf += flips + len(diff_diff_tree_1) + len(diff_diff_tree_2)\n", + "print (\"flips\", flips)\n", + "print (\"tree_1 operations\", len(diff_diff_tree_1))\n", + "print (\"tree_2 operations\", len(diff_diff_tree_2))\n", + "print (erf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# 3) Matching nodes in identical trees" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## 1) Quick check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "print (id_bipartitions_and_linkers(tree_1).keys())\n", + "print (id_bipartitions_and_linkers(tree_2).keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## 2) Checking if, for same topologies, matching nodes have same type" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{('G', 'H'): 'duplication', ('E', 'F', 'G', 'H', 'R'): 'speciation', ('K', 'L'): 'speciation', ('E', 'F'): 'speciation', ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): 'duplication', ('E', 'F', 'G', 'H'): 'speciation', ('I', 'J', 'K', 'L'): 'speciation', ('I', 'J'): 'speciation'}\n", + "{('G', 'H'): 'speciation', ('K', 'L'): 'speciation', ('E', 'F'): 'duplication', ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): 'duplication', ('E', 'F', 'G', 'H'): 'speciation', ('I', 'J'): 'speciation', ('I', 'J', 'K', 'L'): 'speciation', ('E', 'F', 'G', 'H', 'Q'): 'speciation'}\n" + ] + } + ], + "source": [ + "def matching_nodes(tree_1):\n", + " tree_transition = tree_1.clone()\n", + " internal_nodes = [node for node in tree_transition if node.is_internal()]\n", + " leaves_dic = {}\n", + " for node in internal_nodes:\n", + " leaves_dic[tuple(sorted(nd.taxon.label for nd in dendropy.Tree(seed_node = node).leaf_nodes()))] = node.label\n", + " return leaves_dic\n", + "print (matching_nodes(tree_1))\n", + "print (matching_nodes(tree_2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "diff_id = 0\n", + "for node in matching_nodes(tree_1):\n", + " if (matching_nodes(tree_1)[node] == \"speciation\" and matching_nodes(tree_2)[node] == \"duplication\") or (matching_nodes(tree_1)[node] == \"duplication\" and matching_nodes(tree_2)[node] == \"speciation\"):\n", + " diff_id += 1\n", + "print (diff_id) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## C) How many operations to be added to extended Robinson-Foulds metric?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "print (erf)\n", + "erf += diff_id\n", + "print (erf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# FINAL RESULT:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "print (\"The extended Robinson-Foulds metric between those two trees is\", erf)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/ExtendedRF.ipynb b/ExtendedRF.ipynb new file mode 100644 index 00000000..99b549d7 --- /dev/null +++ b/ExtendedRF.ipynb @@ -0,0 +1,1753 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Test trees" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "import dendropy\n", + "from dendropy.calculate import treecompare\n", + "import logging\n", + "from collections import defaultdict\n", + "import itertools\n", + "import random\n", + "import math\n", + "import copy\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "taxa = dendropy.TaxonNamespace()\n", + "tree_str_1 = \"[&R] ((R,((E,F)speciation,(G,H)duplication)speciation)speciation,(Q,((K,L)speciation,(J,I)speciation)speciation)duplication)duplication;\"\n", + "##tree_1 = dendropy.Tree.get(\n", + "## data=tree_str_1,\n", + "## schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_str_2 = \"[&R] ((Q,((E,F)duplication,(G,H)speciation)speciation)speciation,(R,((K,L)speciation,(J,I)speciation)speciation)duplication)duplication;\"\n", + "##tree_2 = dendropy.Tree.get(\n", + "## data=tree_str_2,\n", + "## schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "#tree_str_1 = \"[&U] ((A,B)speciation, (C,D)speciation)speciation;\"\n", + "#tree_str_2 = \"[&U] ((A,C)speciation, (B,D)speciation)speciation;\"\n", + "tree_str_3 = \"[&R] ((A,B)speciation, (C,D)speciation);\"\n", + "tree_str_4 = \"[&R] ((A,C)speciation, (B,D)speciation);\"\n", + "\n", + "tree_1 = dendropy.Tree.get(\n", + " data=tree_str_1,\n", + " schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_2 = dendropy.Tree.get(\n", + " data=tree_str_2,\n", + " schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_3 = dendropy.Tree.get(\n", + " data=tree_str_3,\n", + " schema=\"newick\", taxon_namespace = taxa)\n", + "\n", + "tree_4 = dendropy.Tree.get(\n", + " data=tree_str_4,\n", + " schema=\"newick\", taxon_namespace = taxa)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# Run the test on a much larger dataset\n", + "taxa2 = dendropy.TaxonNamespace()\n", + "t5 = dendropy.Tree.get_from_path('p53.nhx', 'newick', taxon_namespace=taxa2)\n", + "for n in t5.internal_nodes():\n", + " if random.random() < 0.7:\n", + " n.label = 'speciation'\n", + " else:\n", + " n.label = 'duplication'\n", + "t5.seed_node.label = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Function definitions" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# make a copy and reroot\n", + "def cloneAndReroot(t):\n", + " t1 = t.clone(depth=1)\n", + " # reroot around the edge leading to the first leaf\n", + " l = next(t1.leaf_node_iter())\n", + " t1.reroot_at_edge(l.incident_edges()[0])\n", + " return(t1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def identify_labelled_bipartitions(t):\n", + " \n", + " t.encode_bipartitions()\n", + "\n", + " def recursive_f(n,bipart,bipart2nodes,numTaxa):\n", + " if n.is_leaf():\n", + " return\n", + " for c in n.child_node_iter():\n", + " if not c.is_leaf():\n", + " numTaxaC = bin(c.bipartition.split_bitmask).count('1')\n", + " numTaxaN = numTaxa - numTaxaC\n", + "\n", + " # at the start of the recursion, one subtree might contain all taxa \n", + " # minus the other one so we must ignore this trivial bipartition\n", + " if numTaxaN > 1:\n", + " bipart.append(c.bipartition)\n", + " bipart2nodes[c.bipartition] = [c,n]\n", + " recursive_f(c,bipart,bipart2nodes,numTaxa)\n", + " \n", + " numTaxa = len(t.leaf_nodes())\n", + " bipart = []\n", + " bipart2nodes = {}\n", + " recursive_f(t.seed_node,bipart,bipart2nodes,numTaxa)\n", + " return(bipart,bipart2nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def LRF(intree1,intree2):\n", + " t1 = intree1.clone()\n", + " t2 = intree2.clone()\n", + " b1,bTn1 = identify_labelled_bipartitions(t1)\n", + " b2,bTn2 = identify_labelled_bipartitions(t2)\n", + "\n", + " # FIRST PART: identifying the bipartitions unique to one or the other tree\n", + " # and collapsing the trivial edges\n", + " tocollapse1 = []\n", + " tocollapse2 = []\n", + " simple_rf = tot_rf = path_flip = 0\n", + " for k in set(b1).difference(set(b2)):\n", + " logging.info(k)\n", + " # retrieve the nodes associated with the bipartition\n", + " [c,n] = bTn1[k]\n", + " if c.label == n.label:\n", + " logging.info('collapsing node %s' % c)\n", + " c.edge.collapse()\n", + " simple_rf += 1\n", + " else:\n", + " tocollapse1.append(k)\n", + " logging.info('---')\n", + " for k in set(b2).difference(set(b1)):\n", + " logging.info(k)\n", + " # retrieve the nodes associated with the bipartition\n", + " [c,n] = bTn2[k]\n", + " if c.label == n.label:\n", + " logging.info(\"collapsing node %s\" % c)\n", + " c.edge.collapse()\n", + " simple_rf += 1\n", + " else:\n", + " tocollapse2.append(k)\n", + " logging.info('***') \n", + " print('step 1, collapse consistent edges: %d' % simple_rf)\n", + "\n", + " # PART II: identifying the connected components of edges to be \n", + " # collapsed and recording the ends of the longest path\n", + " #\n", + " # The idea of this function is to go from root to leaves,\n", + " # and then, on the way back, compute the longest path of\n", + " # each component to be collapsed.\n", + " # Within each component, we need to keep track of three potential \n", + " # cases: \n", + " # (1) the longest path spans over the edge (n.edge) in question,\n", + " # in which case its total length is still undetermined \n", + " # (2) the longest path spans over two of the children of n. In\n", + " # this case the length is known but the type needs to be\n", + " # determined\n", + " # (3) the longest path is found somewhere deeper. We need to keep\n", + " # track of its type.\n", + " #\n", + " # Note that the longest path is highlighted with ****\n", + " #\n", + " # ------\n", + " # /\n", + " # **************************\n", + " # *\n", + " # ****n.edge**** n (case 1)\n", + " # \\\n", + " # -----------------\n", + " #\n", + " # ------\n", + " # /\n", + " # **************************\n", + " # *\n", + " # ---n.edge---- n (case 2)\n", + " # *\n", + " # **********************\n", + " #\n", + " #\n", + " # ************\n", + " # *\n", + " # ------------**************\n", + " # /\n", + " # ---n.edge---- n (case 3)\n", + " # \\\n", + " # ---------------\n", + "\n", + " # a helpful datastructure to store a path\n", + " class maxpath(object):\n", + " def __init__(self, length=0, n1=None, n2=None):\n", + " self.length = length\n", + " self.n1 = n1\n", + " self.n2 = n2\n", + " def __cmp__(self, other):\n", + " return(cmp(self.length, other.length))\n", + " def __lt__(self, other):\n", + " return(self.length < other.length)\n", + " def __eq__(self, other):\n", + " return(self.length == other.length)\n", + " def __str__(self):\n", + " return('Path length: %d. Ends %s & %s' % (self.length, self.n1, self.n2))\n", + "\n", + " def recursive_f(n,tocollapse,out):\n", + " nonlocal path_flip\n", + " if n.is_leaf():\n", + " return([maxpath(),maxpath()])\n", + " case1 = []\n", + " case3 = []\n", + " for c in n.child_node_iter():\n", + " [t1,t3] = recursive_f(c,tocollapse,out)\n", + " case1.append(t1)\n", + " case3.append(t3)\n", + " case1.sort(reverse=True)\n", + " case3.sort(reverse=True)\n", + " # do we need to collapse the edge in question?\n", + " if n.bipartition in tocollapse:\n", + " # if the maximum path goes over the edge n, we need\n", + " # to increment the length in question\n", + " newcase1 = copy.deepcopy(case1[0])\n", + " if case1[0].length == 0:\n", + " newcase1.n1 = copy.deepcopy(n)\n", + " newcase1.length = newcase1.length + 1\n", + " # if not, we keep track of the longest path in the component\n", + " # seen so far (case 3)\n", + " newcase3 = copy.deepcopy(case3[0])\n", + " # if there are more than one child edge to collapse,\n", + " # there is one last scenario, which is that the longest path\n", + " # is obtained by connecting two children edges (case 2)\n", + " if case1[0].length + case1[1].length > case3[0].length:\n", + " # store the total length\n", + " newcase3.length = case1[0].length+case1[1].length\n", + " # store the end nodes\n", + " newcase3.n1 = case1[0].n1\n", + " newcase3.n2 = case1[1].n1\n", + " return([newcase1,newcase3])\n", + " else:\n", + " # if one of the children was still part of a component,\n", + " # we need to store it\n", + " if sum([c.length for c in case1]) > 0:\n", + " # we need to check what is the maximum length path\n", + " if case1[0].length > case3[0].length:\n", + " # longest path is case 1 or case 2\n", + " if case1[1].length > 0:\n", + " # longest path is case 2\n", + " bestpath = maxpath(case1[0].length + case1[1].length, case1[0].n1, case1[1].n1)\n", + " if bestpath.length % 2 == 0:\n", + " assert bestpath.n1.label == bestpath.n2.label\n", + " else:\n", + " assert bestpath.n1.label != bestpath.n2.label \n", + " else:\n", + " bestpath = case1[0]\n", + " bestpath.n2 = copy.deepcopy(n)\n", + " # longest path is case 1\n", + " if bestpath.length % 2 == 0:\n", + " assert n.label == bestpath.n1.label\n", + " else:\n", + " assert n.label != bestpath.n1.label\n", + "\n", + " else:\n", + " # longest path is case3\n", + " bestpath = case3[0]\n", + "\n", + " path_flip += math.floor((bestpath.length+1)/2)\n", + " out[n] = bestpath\n", + " # determine type\n", + " if bestpath.length % 2 == 0:\n", + " n.label = bestpath.n1.label\n", + " else:\n", + " n.label = \"arbitrary\"\n", + " # reset counts\n", + " return([maxpath(),maxpath()])\n", + " \n", + " path_flip = 0\n", + " tot_rf = simple_rf + len(tocollapse1) + len(tocollapse2)\n", + " out1 = {}\n", + " recursive_f(t1.seed_node,tocollapse1,out1)\n", + " out2 = {}\n", + " recursive_f(t2.seed_node,tocollapse2,out2)\n", + " for n in t1.internal_nodes():\n", + " if n.bipartition in tocollapse1:\n", + " n.edge.collapse()\n", + " for n in t2.internal_nodes():\n", + " if n.bipartition in tocollapse2:\n", + " n.edge.collapse()\n", + " \n", + " \n", + " logging.info(' #component t1 %d', len(out1))\n", + " logging.info(out1)\n", + " logging.info(' #component t2 %d', len(out2))\n", + " logging.info(out2)\n", + " logging.info(' #flips %d', path_flip)\n", + " logging.info(t1)\n", + " logging.info(t2)\n", + " logging.info(t1.as_ascii_plot())\n", + "\n", + " ## PART 3: flipping the nodes, if needed, and checking whether\n", + " ## arbitary vs arbitrary comparisons can save a flip by going\n", + " ## over an expansion instead of a contraction.\n", + " \n", + " def ordered_bipartitions(tree):\n", + " leaves_dic = {}\n", + " for node in tree.internal_nodes():\n", + " leaves_dic[tuple(sorted(nd.taxon.label for nd in dendropy.Tree(seed_node = node).leaf_nodes()))] = node\n", + " return leaves_dic\n", + "\n", + " ob1 = ordered_bipartitions(t1)\n", + " ob2 = ordered_bipartitions(t2)\n", + "\n", + " \n", + " def branches_at_end(path,tocollapse):\n", + " tc1 = set([t.split_bitmask for t in tocollapse])\n", + "\n", + " s1 = [t.bipartition.split_bitmask for t in path.n1.child_nodes()]\n", + " s1.append(path.n1.bipartition.split_bitmask)\n", + " s1 = set(s1)\n", + " s1 = s1.difference(tc1)\n", + "\n", + " s2 = [t.bipartition.split_bitmask for t in path.n2.child_nodes()]\n", + " s2.append(path.n2.bipartition.split_bitmask)\n", + " s2 = set(s2)\n", + " s2 = s2.difference(tc1)\n", + "\n", + " # convention is return speciation, then duplication node\n", + " if path.n1.label == 'speciation':\n", + " return([s1,s2])\n", + " else:\n", + " return([s2,s1])\n", + "\n", + " end_flip = 0\n", + " for b in ob1:\n", + " if (ob1[b].label == \"speciation\" and ob2[b].label == \"duplication\") or (\n", + " ob1[b].label == \"duplication\" and ob2[b].label == \"speciation\"):\n", + " end_flip += 1\n", + " elif (ob1[b].label == \"arbitrary\" and ob2[b].label == \"arbitrary\"):\n", + " # let's check if we can save a flip\n", + " \n", + " [s1,s2] = branches_at_end(out1[ob1[b]],tocollapse1)\n", + " [s3,s4] = branches_at_end(out2[ob2[b]],tocollapse2)\n", + " logging.info(s1,s2,s3,s4)\n", + " \n", + " # To be able to save a flip via expansion, one end has to\n", + " # contain all of the branches of its corresponding end in\n", + " # the other tree, e.g. like so: \n", + " #\n", + " # s1 s2 s3 s4\n", + " # {b1,b2,b3}S-----D{b4,b5} vs. {b1,b2}S-----D{b3,b4,b5} \n", + " # \n", + " # (s3 contained in s1, and s2 contained in s4). \n", + " # Note that the end type have to match.\n", + " if (bool(s1.difference(s3)) or bool(s3.difference(s1)) or \n", + " bool(s2.difference(s4)) or bool(s4.difference(s2))):\n", + " end_flip -= 1\n", + " logging.info('Matching islands are both of type arbitrary & can save a flip!')\n", + " print('step 3, perform remaining flips ', end_flip)\n", + " erf = tot_rf+path_flip+end_flip\n", + " print('**TOTAL** extended RF dist:', erf)\n", + " return(erf)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "## The following function definitions are used to introduce random edits\n", + "\n", + "def collapse_node(node):\n", + " add_flip = 0\n", + " if node.parent_node != None and node.label == node.parent_node.label:\n", + " node.edge.collapse()\n", + " else:\n", + " raise ValueError('this edge cannot be collapsed without a flip!')\n", + " return add_flip\n", + "def flip_node(node):\n", + " if node.label == 'speciation':\n", + " node.label = 'duplication'\n", + " elif node.label == 'duplication':\n", + " node.label = 'speciation'\n", + " else:\n", + " raise ValueError('Should not happen!!')\n", + " number = random.randint(1,2)\n", + " if number == 1:\n", + " node.label = \"speciation\"\n", + " if number == 2:\n", + " node.label = \"duplication\"\n", + " \n", + "## Note by CD: I have simplified and cleaned up the code. See my version below\n", + "def extend_node2(node):\n", + " if len(node.adjacent_nodes())<= 3:\n", + " raise ValueError(\"Insufficient degree\")\n", + " \n", + " tree_test = tree_1.clone()\n", + " tmp = node\n", + " modif = node.child_nodes()\n", + " random_number = random.randint(1, len(modif) - 2)\n", + " partition = modif[random_number:len(modif)]\n", + " subtrees_modif = []\n", + " for nd in partition:\n", + " subtrees_modif.append(dendropy.Tree(seed_node = nd).leaf_nodes())\n", + " subtrees_modif = [item for sublist in subtrees_modif for item in sublist]\n", + " subtrees_modif = [node.taxon.label for node in subtrees_modif]\n", + " tree_test.retain_taxa_with_labels(subtrees_modif)\n", + " tmp.add_child(tree_test.seed_node)\n", + " return tree_1.as_ascii_plot()\n", + "\n", + "def extend_node(node):\n", + " if len(node.adjacent_nodes())<= 3:\n", + " print(node.adjacent_nodes(),node.incident_edges())\n", + " raise ValueError(\"Insufficient degree. Should not happen\")\n", + " # store the children in a temporary array\n", + " children = node.child_nodes()\n", + " # shuffle the array\n", + " random.shuffle(children)\n", + " \n", + " \n", + " node.clear_child_nodes()\n", + " new = node.add_child(dendropy.Node(label=node.label))\n", + " # choose between 2 and n-1 children to be connected to the new node\n", + " k = random.randint(2,len(children)-1) \n", + " for i in range(0,len(children)):\n", + " # add the first k \n", + " if i < k:\n", + " new.add_child(children[i])\n", + " else:\n", + " node.add_child(children[i])\n", + "\n", + "\n", + "def mutateLabeledTree(tree, n):\n", + " e = f = c = 0\n", + " for i in range(0, n):\n", + " logging.info(i)\n", + " #potential nodes\n", + "\n", + " potential_nodes_to_collapse = []\n", + " for n in tree.internal_nodes():\n", + " #skip the root node\n", + " if n.parent_node == None:\n", + " continue\n", + " if n.label == n.parent_node.label and n.label != None:\n", + " potential_nodes_to_collapse.append(n)\n", + " potential_nodes_to_flip = [n for n in tree.internal_nodes() if n.label != None]\n", + " potential_nodes_to_expand = [n for n in tree.internal_nodes() if \n", + " len(n.incident_edges()) > 3 and n != tree.seed_node]\n", + " \n", + " \n", + " ncol = len(potential_nodes_to_collapse)\n", + " nflip = len(potential_nodes_to_flip)\n", + " nexp = len(potential_nodes_to_expand)\n", + " \n", + " # 30% chance of flipping\n", + " if random.random() < 0.3:\n", + " flip_node(potential_nodes_to_flip[random.randint(0,nflip-1)])\n", + " f += 1\n", + " else:\n", + " x = random.randint(0,ncol+nexp-1)\n", + " if x < ncol:\n", + " collapse_node(potential_nodes_to_collapse[x])\n", + " c += 1\n", + " else:\n", + " e += 1\n", + " extend_node(potential_nodes_to_expand[x-ncol])\n", + " logging.info(tree)\n", + " logging.info('#flips %s #col %s, #exp %s',f,c,e)\n", + " return(tree)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Data analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'int' object has no attribute 'append'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mt2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmutateLabeledTree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdepth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mLRF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mres_rf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtreecompare\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msymmetric_difference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mt2\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'append'" + ] + } + ], + "source": [ + "# Simulation & computation\n", + "\n", + "t = cloneAndReroot(t5)\n", + "random.seed(12)\n", + "abort = False\n", + "res = [0]*20\n", + "res_rf = [0]*20\n", + "logging.root.level = 40\n", + "for i in range(1,15):\n", + " if abort:\n", + " break\n", + " res.append([])\n", + " res_rf.append([])\n", + " for j in range(0,20):\n", + " #if i == 4 and j == 12:\n", + " # abort = True\n", + " # break\n", + " t2 = mutateLabeledTree(t.clone(depth=1), i)\n", + " \n", + " res[i].append(LRF(t,t2))\n", + " res_rf[i].append(treecompare.symmetric_difference(t,t2))\n", + " t = t2;\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "setting an array element with a sequence.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtranspose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Labelled Robinson Foulds:'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: setting an array element with a sequence." + ] + } + ], + "source": [ + "x = np.array(res)\n", + "y = x.transpose()\n", + "df = pd.DataFrame(y)\n", + "print('Labelled Robinson Foulds:')\n", + "df.plot.box()" + ] + }, + { + "cell_type": "code", + "execution_count": 1880, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Standard Robinson Foulds:\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1880, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfwAAAFkCAYAAADFZ4k9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X2QHHd95/H3l0C8O2IRV5HB8hFlrSAzs1IA7zoQTpgk\nECDnJBvZVyQsVuVyINl5gFA6qhIo7LMdO3eEFAhiyJUNzgWQWR4uOEhVBBxCDgIJUMwIiKWZSJy1\nGJDsEhiE8O46HP7dHz0r7672aXanZ3a336+qrdb0dM/31zOj/vSvpx8ipYQkSVrfHtftBkiSpPwZ\n+JIkFYCBL0lSARj4kiQVgIEvSVIBGPiSJBWAgS9JUgEY+JIkFYCBL0lSARj4kiQVQMuBHxFXRMTB\niPhWRDwaEcPTnnt8RPxpRHw1In7QnOY9EbG5vc2WJEmtWE4PfwPwZeD3gdkX4i8BzwZuBi4DrgKe\nAXx0BW2UJEkrFCu5eU5EPArsSikdXGCay4EvAD+VUvrmsotJkqRl68Rv+E8m2xPwvQ7UkiRJc3h8\nni8eERcAbwLen1L6wTzT/ATwUmAMmMyzPZIkrTM9QD/wiZTSdxaaMLfAj4jHAx8m693/3gKTvhS4\nK692SJJUANcA719oglwCf1rY/yTwwvl6901jAAcOHKBSqSyr3r59+9i/f/+y5l0pa1vb2ta29vJM\nTEwwNjY2Y9xb3vIWXve6180Y19/fT29vb+7t6dZynzgBN9zwFm655XVccslj0y1luev1Ort374Zm\nli6k7YE/Ley3Ar+YUvruIrNMAlQqFQYHB5dVc+PGjcued6WsbW1rW9vay7dz584Zjz/4wQ9yzTXX\ndKT2bN1a7lOn4H3v+yCvetU1bF7+SeyL/iTecuBHxAbg6UA0R22NiGcBDwEngb8mOzXvV4EnRMRT\nm9M9lFL6Yav1JEnFcOoU/Ou/ZsMVBN+as3kzPOMZ+S/zco7Svxw4DFTJfp9/C1AjO/f+acCvNYdf\nJtsAONUcPq8N7ZUkrVOnTsGxY9lQ7ddyDz+l9GkW3lDwcr2SJK0y6yKcR0ZGrG1ta1vb2uugNhRz\nuTtRe0VX2mtLAyIGgWq1Wu3aASqSpO6r1WBoCKpVMA6WplarMTQ0BDCUUqotNO266OFLkqSFGfiS\nJBWAgS9JUhdNTMCRI9kwTwa+JGlV6OmBgYFsWCT1OuzYkQ3zlOvNcyRJWqqBgaynq3zYw5ckqQAM\nfEmSCsDAlySpAAx8SZIKwMCXJKkADHxJkgrA0/IkSeqiSgXuvRe2bs23jj18SdKqcPQobN+eDYuk\ntzdb7t7efOsY+JKkVWFyMgv7yclut2R9MvAlSQJGR0e73YRcGfiSJGHgS5KkdcDAlySpADwtT5JU\nSKOjozN24x86dIjh4eFzj0dGRhgZGelG03Jh4EuSCml2oA8PD3Pw4MGOt+PUKbj9drjuOti8Ob86\n7tKXJK0KmzfDjTfmG3qr0alTcPPN2TBP9vAlSavC5s1w003dbsX6ZQ9fkiRYV7/Xz8XAlyQJA1+S\nJK0DBr4kSQVg4EuSVAAGviRJXdTTAwMD2TBPnpYnSVoVJibgvvtg69b87w2/mgwMwJEj+dexhy9J\nWhXqddixIxuq/Qx8SZIKwMCXJKkADHxJkgrAwJckqQAMfEmSCsDAlySpAAx8SZK66OhR2L49G+bJ\nC+9IklaFSgXuvTe78E6RTE5mYT85mW8dA1+StCr09mY9XeWj5V36EXFFRByMiG9FxKMRMTzHNH8c\nEScjYjwi/i4int6e5kqSpOVYzm/4G4AvA78PpNlPRsQfAa8GrgOeAzwMfCIifnwF7ZQkSSvQ8i79\nlNLHgY8DRETMMclrgVtSSoea0/wW8CCwC/jQ8psqSZKWq61H6UfEJcBFwN9PjUspfR/4AvC8dtaS\nJElL1+6D9i4i283/4KzxDzafkyStIuPj4zQajUWnK5fLlEqlDrSoMzq93MePw9mzcz83dXfA+e4S\n2NcH27atuAkdO0o/mOP3/un27dvHxo0bZ4wbGRlhZGQkz3ZJUqE1Gg2GhoYWna5arTI4ONiBFnVG\nJ5f7+HG49NLFp9u9e/7njh2DL31plNHR0Rnjz5w5s+R2tDvwHyAL96cys5f/FODwQjPu379/XX2Z\nJGktKJfLVKvVc4/r9Sx4DhzIzoufPl3eTp2C22+H666DzZvzrTV7uU+fho98BK6+Gi68cOZ0KzXV\ns5/9ni7F1Odx9uzcneBarbakDRdoc+CnlE5ExAPAi4CvAkTEk4DnAu9sZy1J0sqVSqUZna3Nm+HG\nG+GFL8w/dGc7dQpuvhmGh/OvPXu5azW4445sYyOvvmelkt9rL0XLgR8RG4Cnk/XkAbZGxLOAh1JK\n3wDeBlwfEV8DxoBbgG8CH21LiyVJudm8GW66qdutUB6W08O/HPgHst/kE/CW5vj3AK9MKb05IkrA\n7cCTgX8E/mNK6d/a0F5JkrQMyzkP/9MscjpfSukm4KblNUmSJLWbd8uTJKkADHxJkgrAwJckqQAM\nfEnSqtDTAwMD2bBItTulU1fakyStARMTcN99sHVrdn/6ThoYgCNHOltzNdTuFHv4kqRz6nXYsWP+\n67pr7TLwJUkqAANfkqQCMPAlSSoAA1+SpAIw8CVJKgADX5KkAjDwJUmrwtGjsH17NixS7U7xwjuS\npHMqFbj33uzCO502OZkF7uRksWp3ioEvSTqntzfr6Wr9cZe+JEkFYOBL0jxGR0e73QSpbQx8SZqH\nga/1xMCXJKkAPGhPktQxx4/D2bNzPzd1h7757tTX1wfbtq3N2quBgS9JTaOjozN24x86dIjh4eFz\nj0dGRhgZGelG09aF48fh0ksXn2737vmfO3ZsecHbzdqrhYEvSU2zA314eJiDBw92sUWdd+oU3H47\nXHcdbN7c3tee6l0fOJCd79+Kej0L4/l66Ku59mph4EuSzjl1Cm6+GYaH2x/4UyoVGBzM57VXc+1u\n86A9SZIKwMCXpHn4e73WEwNfkuZh4Gs9MfAlSSoAA1+SpAIw8CVJKgADX5J0Tk8PDAxkQ60vnocv\nSTpnYACOHOl2K5QHe/iSJBWAPXxJknIUE+NcRoPeeW7Ms5DeOlwGxEQZKK2oHQa+JEk56hlrUGMI\nFrgxz3wqQA2oj1Vh58quCWzgS5KUo8n+MoNUuWuZN+65Zjfc2V9ecTsMfEmScpR6SxxmkIkK0GIn\nfQI4DKTelbfDg/YkSSoAA1+SpAIw8CVJ5xw9Ctu3Z0OtLwa+JOmcycks7Ccnu90StVvbAz8iHhcR\nt0TEfRExHhFfi4jr211HkiQtXR5H6b8euA74LeAocDnwVxHxvZTSO3KoJ0mSFpFH4D8P+GhK6ePN\nx/dHxCuA5+RQS5IkLUEev+H/E/CiiNgGEBHPAnYCH8uhliRJWoI8evhvAp4ENCLiR2QbFW9MKX0g\nh1qSJGkJ8gj83wReAbyc7Df8ZwNvj4iTKaX35VBPklZsfHycRqOx6HTlcplSaWU3Mel27ePH4ezZ\nuZ+r12cOZ+vrg23blle3mzeRmXwoq/31u2m5/gMnVlZ7fDwb1motzzrv57AceQT+m4H/nlL6cPPx\nkYjoB94AzBv4+/btY+PGjTPGjYyMMDIykkMTJWmmRqPB0NDQotNVq1UGB1d2E5Nu1j5+HC69dPHp\ndi9wo5djx5YX+t28icyDn27WvnV5ta8E7j9dpeVr4wJT23J797Zee0pfH4yOjjI6Ojpj/JkzZ5b8\nGnkEfglIs8Y9yiLHC+zfv7/t/4kkaanK5TLVavXc49On4SMfgauvhgsvnDndWq491bM/sMwbueze\nPf/egcV08yYyV+wtczdV+vuhp+f850+cgOtvgFtvgUsuOf/5DRtgy0uWV3vXrmxYLsNcO2im3tf5\nPpOpvSrbtp3fCa7VakvaWIR8Av8Q8MaI+AZwhGxzaB/w7hxqSVJblEqlGZ2OWg3uuAOuuw7y7ot0\no3alkv9yzdbNm8hs2lLiqlvmLzpRg8M3wEVXQqXN78umTbBnz+LT5f2Z5BH4rwZuAd4JPAU4CfzP\n5jhJktQFbQ/8lNLDwH9t/kmSpFXAa+lLklQABr4kSQVg4EuSVAAGviSp8Hp6YGBg7lP21kvtPI7S\nl6Q1rwgBoMcMDMCRI+u7toEvSXMoQgCoWNylL0lSARj4kiQVgIEvSVIBGPiSJBWAgS9JUgEY+JIk\nFYCBL0kqvKNHYfv2bLheaxv4kjSHIgSAHjM5mb3fk5Prt7aBL0lzKEIAqFgMfEmSCsDAlySpAAx8\nSZIKwMCXJKkADHxJkgrA2+NKmmF8fJxGo7HodOVymVKptKZrHz8OZ8/O/Vy9PnM4W18fbNu29mrH\nxDiX0aB3ntdeSG8dLgNiogy0/v6Pj2fDWq312vO9F8s1+7t2+jRce202nN6+PL7ns23eDDfemA1z\nlVLq6h8wCKRqtZokdV+1Wk3Aon95/J/tZO1jx1KClf0dO7b2ah89UF1x8aMHlvf+v+td3Vvu2br5\nPW+nacsxmBbJW3v4kmYol8tUq9Vzj+t12L0bDhyASmXmdGu59lTvevZrL8VUu+broa/m2pP9ZQap\nctcya1+zG+7sX977v2tXNiyXYa5O83yf95SV7lWZbvZ3baHp1gsDX9IMpVKJwcHB88ZXKjDH6DVf\nuxPLtZpqp94ShxlkokK2f7UFE8BhIPUur/amTbBnz+LTdfO7tp550J4kSQVg4EuSVAAGviRJBWDg\nS5JUAAa+JEldNDEBR45kwzwZ+JIW1NMDAwPZsEi11XlF/bzrddixo/0XF5rN0/IkLWhgIOt9FK22\nOs/PO1/28CVJKgADX5KkAjDwJUkqAANfkqQCMPAlSSoAA1+SpALwtDxJkrqoUoF774WtW/OtYw9f\n0oKOHoXt27NhkWqr84r6eff2Zsvdu8zbDi+VgS9pQZOT2Qp4crJYtdV5ft75MvAlSSqAXAI/Ii6O\niPdFxLcjYjwivhIRg3nUkiRJi2t74EfEk4HPAY8ALwUqwOuA77a7liRJ7fKa17ym203IVR49/NcD\n96eU9qSUqimlr6eUPplSOpFDLUmS2uLDH/5wt5uQqzwC/9eAL0XEhyLiwYioRcSeHOpIkqQlyuM8\n/K3A7wJvAf4EeC7w5xExmVI6kEM9ad0ZHx+n0WgsOl25XKZUKq243vHjcPbs3M9N3aN7vnt19/XB\ntm1rr3ZMjHMZDXqXcQ/y3jpcBsREGWj9/e9m7fHxbFirtV673fdrn/09n+/zbtf3fLWYvdynT8NH\nPgJXXw0XXvjYdO1e7kgpte3FACLiEeCLKaUrpo17O3B5SmnnHNMPAtUXvOAFbNy4ccZzIyMjjIyM\ntLV90lpQq9UYGhpadLpqtcrg4MqOhz1+HC69dEUvwbFjywvebtau31Wjsnvx93jB1zhQpXJN6+9/\nN2u/+92wd++KSi/7PZ+tk9/zubzmNa+ZsRv/wQcf5KlPfeq5xy972cu47bbb2l53ucs9OjrK6Ojo\njGnOnDnDZz7zGYChlNKCm3F59PBPAbO3A+vA1QvNtH///lw+UGktKpfLVKvVc48X6gGs1FTv+sCB\n7IpfrajXYffu+Xvoq7n2ZH+ZQarctcza1+yGO/uX9/53s/auXdmwXIa5Oo9T7+t8n8lK9+hMN/t7\nvtB0ebjttttmBPpFF13EAw88kEut6Za73HN1gpe68QD5BP7ngGfMGvcM4Os51JLWpVKpNGMDuFaD\nO+6A666DvLaLK5X8Xns11k69JQ4zyEQFaLH2BHAYSMu8Mlo3a2/aBHuWcFRVJz6T2d/zoujWcudx\n0N5+4Oci4g0R8dMR8QpgD/COHGpJkqQlaHvgp5S+BFwFjAD/ArwReG1K6QPtriVJUru87GUv63YT\ncpXL3fJSSh8DPpbHa0uSlIc8DtBbTbw9rqRCWk2np0mdYOBLKqSp06BXcopaX1972iJ1goEvqZBW\n0+lpq0lPDwwMZEOtLwa+tAa4Em6/1XR62moyMABHjnS7FcqDgS+tAa6EJa1UHufhS5KkVcbAlySp\nAAx8SZIKwMCXJKkADHxJmoNnRmi98Sh9SZqDZ0ZovbGHL0k65+hR2L49G2p9MfClNcCVsDplcjL7\nnk1OdrslajcDX1oDXAlLWikDX5KkAjDwJUkqAANfkiRgdHS0203IlYEvSRIGviQVkmdGaL3xwjvS\nPMbHx2k0GotOVy6XKZVKKy3G/fc0ePjhuZ9+4ARcBjzwMajXz39+wwbY8pIyLKMdMTHOZTToneN1\nF9Nbz9oVE2VgbdVeTJ5nRoyPZ8NarfV55/r822nzZrjxxmyo9cXAl+bRaDQYGhpadLpqtcrg4OCK\nat1/T4MtV81fqwJcCXDDAq9xd5Utu1pvR89YgxpDsLvlWakANaA+VoWda6t2N01tR+7du/zX6Otr\nT1tm27wZbropn9debUZHR2fsxj906BDDw8PnHo+MjDAyMtKNpuXCwJfmUS6XqVar5x6fPg0f+Qhc\nfTVceOHM6VbqOxeW2UWVW2+BSy5pbd4TJ+D6G+DOC8tsWUbtyf4yg1S56wBUKq3NW6/DNbvhzv7l\nvQfdrN1Nu3Zlw/I8O2Xqddi9Gw7M87709cG2bfm2sQhmB/rw8DAHDx7sYovyZeBL8yiVSjN67rUa\n3HEHXHcdrLBDf57UW+Iwg1x0JVRafO2JGhy+AVLvympPVIBWawOHWZu1u2nTJtizZ/HpKpX2f9dU\nXB60J0lSARj4kiTBuvq9fi4GviRJGPiSVEienqb1xsCXpDlMnZ5WtMCfmIAjR7Kh1hcDX5JWmZ4e\nGBjIhp1Wr8OOHflf4Eed52l50hJ1cyWsYhkYyHrZUjsZ+NISuRKWtJa5S1+SpAIw8CVJKgADX5Kk\nAjDwJWkOnp6m9cbAl6Q5eHqa1huP0pcknVOpwL33wtat3W6J2s0eviStMkePwvbt2bDTenuz2r1r\n8LbDWpiBLy1RN1fCKpbJyex7NjnZ7ZZoPTHwpSVyJSxpLTPwJUkqgNwDPyLeEBGPRsRb864lSZLm\nlmvgR8TPAnuBr+RZR5IkLSy3wI+IJwIHgD3A9/KqI0l5mDo9rVLpdkuk9sizh/9O4FBK6VM51pCk\nXBT19LRTp+Cmm7Kh1pdcLrwTES8Hng1cnsfra307fhzOns3+PTExzthYY9F5+vvL9PaW6OuDbdva\nU3u2qSuuzXfltZXUHh/PhrVa6/Ou5SvBrablHh8fp9FY/LtWLpcplUq51j59Gq69NhtOf286Ubte\nh5tvzr7L0/du5FFbndX2wI+IpwFvA16cUvrhUufbt28fGzdunDFuZGSEkZGRNrdQq9nx43DppdPH\nNIChJcxZBQYBOHZsecF7fu257d49/3PLrT21vt27t/V5p/T1LX/ebllNy91oNBgaWvy7Vq1WGRwc\nbE/RRWrfcUf3as/+nudRW60ZHR1ldHR0xrgzZ84sef48evhDwIVANSKiOe7HgBdExKuBC1JKafZM\n+/fv98ukc73rAwey3sXERJmxseqi8/X3lxkby1ZS8/XQW63dinp9ZbV37cqG5TLM1Ymaev352rbS\nPRvdspqWu1wuU60u/l0rl8vtKWhttWiuTnCtVlvShirkE/ifBH5m1ri/AurAm+YKe2m2SgWy7b8S\nO3cubUOwXb+1Pla7czZtgj17Fp+uG23L02pa7lKp1LVOR1Frq7PaHvgppYeBGRcfjYiHge+klNbw\nr42SJK1dnbrSnr16SZK6qCO3x00pvbATdSRJ0tw6EviSVq/VdGqcpPwY+FLBraZT4yTlx8CX1oCe\nHhgYyIbttppOjZstz+WWisbAl9aAgQE4ciSf115Np8bNludyS0XTqaP0JUlSFxn4kiQVgIEvSVIB\nGPiSJBWAgS9JUgEY+JIW5Klx0vrgaXmSFuSpcdL6YA9fWgOOHoXt27NhkRR1uaU8GPjSGjA5mYXe\n5GS3W9JZRV1uKQ8GviRJBWDgS5JUAAa+JEkFYOBL0jxGR0e73QSpbQx8SZqHga/1xMCXtCBPjZPW\nBy+8ozkdPw5nz2b/npgYZ2ysseg8/f1lentL9PXBtm3LqxsT41xGg9566/P21uEyICbKQKnl+Scf\nymp//W5arv/AiZXVnm18fJxG47H3/PRpuPbabFirPTZduVymVFp5vYVq1+tZ2B8+PPP0uDxqz7Z5\nM9x4YzaUtDIGvs5z/Dhceun0MQ1gaAlzVoFBAI4dW17o94w1qDEEu1uftwLUgPpYFXYOtjz/g59u\n1r51ebWvBO4//dh7sBKNRoOhofPf8zvumPm4Wq0yOLjyekupvXvWZ5JH7dk2b4abbsq1xAyjo6Mz\nduMfOnSI4eHhc49HRkYYGRnpXIOkNjLwdZ6pnv2BA1CpwMREmbGx6qLz9feXGRvLgmHqNVo12V9m\nkCp3NWu3ol6Ha3bDnf3lZdW+Ym+Zu6nS3z/3deNPnIDrb4Bbb4FLLjn/+Q0bYMtLlld7tnK5TLW6\n+HteLren3mqp3W2zA314eJiDBw92sUVS+xj4mlelAlkHrsTOJfaYe3tXVjP1ljjMIBMVWu4oTwCH\ngbTMNmzaUuKqW+YvOlGDwzfARVdCJd+OLaVSKffe82qsLSk/HrQnSVIBGPiSNA9/r9d6YuBL0jwM\nfK0nBr4kSQVg4EtatSYm4MiRbChpZQx8aYl6emBgYO5T9pSPeh127MiGklbG0/KkJRoYyHqbkrQW\n2cOXJKkADHxJkgrAwJckqQAMfEmSCsDAlySpAAx8SZIKwNPyJK1alQrcey9s3drtlkhrnz18aYmO\nHoXt27OhOqO3N3vPV3rbZUkGvrRkk5NZ2E9OdrslktQ6A1+SpAJoe+BHxBsi4osR8f2IeDAi7o6I\nS9tdR5IkLV0ePfwrgNuA5wK/BDwBuCci/BVOkqQuaXvgp5SuTCm9L6VUTyn9C/DbwBZgqN21pCIZ\nHR3tdhMkrWGd+A3/yUACHupALWndMvAlrUSugR8RAbwN+GxKyZOZJLXk1Cm46aZsKGll8r7wzl8A\nA8DOnOusP+Pj3H9Pg4cfzh4+8sgEJ0+OLTrbxRf3c8EFvWzYAFteUoZSqeXSkw+NcxkNvn439NZb\nm/eBE3AZEBNloPXa4+PZsFbLhhMT44yNNRadr7+/zNhY6/UWbss4jcZjtU+fhmuvzYZT7QMol8uU\nlvE+63yz3/N6HW6+GbZtyy7CM8X3XGpdboEfEe8ArgSuSCktun2+b98+Nm7cOGPcyMgIIyMjObVw\ndbv/ngZbrpp52MOzW32Nu6ts2TXYcu0HP92gxhDc2vKsVMg+9PtPV4HWa0+t6/fuPTeGpR3+8Vi9\nvr6Wy87TlgZDQ+fXvuOOWZWrVQYHW1/WxYyOjs7YjX/o0CGGh4fPPV6P/z/me8937575OK/3XFrN\nZq8TAM6cObPk+SOl1O42TYX9rwM/n1K6b5FpB4Gq/4FnOvy5cV71/Aa33gKXXNJaD//kyV6uvwHu\n/GyZy3a23gv69v3j/OO7GvT3Q0/P+c+fOAHX38C5ts22kr0L3/42/M3fQLk5eys9/N7eEn19WW+w\nHWb3NufTqd7m8PAwBw8ezL1ON62291xa7Wq12tRG8lBKqbbQtG3v4UfEXwAjwDDwcEQ8tfnUmZSS\n1yhbotRb4jCDXHQlVJrbQc9e4i8jj9bg8A2Qlnki5KYtJa66Zf6Nr4nm609vW7ts2gR79kwfU2Ln\nzu5sCJZKJTdCO8z3XMpPHgft/Q7wJOD/ACen/f1GDrUkSdIStL2Hn1Lycr1SDtbb7/WSOstwltYI\nA1/SShj4kiQVgIEvSVIBGPhqWU8PDAzMfcqeJGl1yvtKe1qHBgbgyJFut0KS1Ap7+JIkFYCBL0lS\nARj4kiQVgIEvSVIBGPiSJBWAgS9JUgEY+JIkFYCBr5YdPQrbt2dDSdLaYOCrZZOTWdhPTna7JZKk\npTLwJUkqAANfkqQCMPAlSSoAA1+SpAIw8CVJKgADX5KkAnh8txuwqo2Pc/89DR5+OHv4yCMTnDw5\ntuhsF1/czwUX9LJhA2x5SRlKpeWUBqBWa3lW6vXW51m4LeM0Go1zj0+fhmuvzYbT21culyktY1kl\nSfkz8Bdw/z0Ntlw1NGPcs1t9jburbNk12HLtqXzdu7flWc/p61v+vDPb0mBoaOi88XfcMfNxtVpl\ncLD1ZZUk5c/AX8B3Liyziyq33gKXXNJaD//kyV6uvwHuvLDMlmXU3rUrG5bn2UFQr8Pu3XDgAFQq\n5z/f1wfbti2j8BzK5TLVanVJ00mSVicDfwGpt8RhBrnoSqg0O67PZueS5n20BodvgNS7vNqbNsGe\nPYtPV6lA3p3qUqlkz12S1jgP2pMkqQAMfEmSCsDAlySpAAx8SZIKwMCXJKkADPw1qqcHBgayoSRJ\ni/G0vDVqYACOHOl2KyRJa4U9fEmSCsDAlySpAAx8SZIKwMCXJKkADHxJkgrAwJckqQAMfEmSCsDA\nX6OOHoXt27OhJEmLMfDXqMnJLOwnJ7vdEknSWrAuAn90dLSQtaGYy21ta1vb2tZuXW6BHxG/HxEn\nImIiIj4fET+bV631/iEtUL17lQv6nlvb2ta29lqtnUvgR8RvAm8BbgQuA74CfCIiNuVRT5IkLSyv\nHv4+4PaU0ntTSg3gd4Bx4JU51ZMkSQtoe+BHxBOAIeDvp8allBLwSeB57a4nSZIWl8ftcTcBPwY8\nOGv8g8Az5pi+B6Ber8/5Yt89NcHhu8fOPZ6cPMs3v/nVGdN89d6v8ju/8Hszxj3tac+kp6ePpzwF\ndvxKP/T2trQQAIcPZ8O774Z6HR55ZIKTJ8dmTHPs2De59da7Zoy7+OJ+Tp7M6s2zWC2bmJhgbOyx\n2idOAHyTj33srhk1+vv76V3GsrbqzJkz1Gq13OtY29rWtra15zctO3sWmzayznf7RMRm4FvA81JK\nX5g2/s3A81NK/2HW9K8AZiamJElqxTUppfcvNEEePfxvAz8Cnjpr/FM4v9cP8AngGmAM8KxySZKW\nrgfoJ8vSBbW9hw8QEZ8HvpBSem3zcQD3A3+eUvqztheUJEkLyqOHD/BW4D0RUQW+SHbUfgn4q5zq\nSZKkBeQXq/zJAAAH7klEQVQS+CmlDzXPuf9jsl37XwZemlI6nUc9SZK0sFx26UuSpNVlXVxLX5Ik\nLczAlySpANZ04HfyBj2z6l4REQcj4lsR8WhEDHeo7hsi4osR8f2IeDAi7o6ISztU+3ci4isRcab5\n908R8cudqD1HW97QfN/f2oFaNzZrTf87mnfdafUvjoj3RcS3I2K8+RkMdqDuiTmW+9GIuK0DtR8X\nEbdExH3NZf5aRFyfd91p9Z8YEW+LiLFm/c9GxOU51Fl0PRIRfxwRJ5vt+LuIeHonakfEVRHx8Yg4\n3Xz+me2ou1jtiHh8RPxpRHw1In7QnOY9zeu75Fq7+fyNEVFv1n6o+Z4/pxO1Z017e3OaP2hH7Slr\nNvC7fIOeDWQHIv4+0MmDIK4AbgOeC/wS8ATgnojI/9J68A3gj8gumzwEfAr4aERUOlD7nOZG3V6y\nz7tT7iU7+PSi5t/zO1E0Ip4MfA54BHgpUAFeB3y3A+Uv57HlvQh4Mdl3/UMdqP164Drg94Ay8IfA\nH0bEqztQG+BO4EVk1wfZAfwd8Ml2hc40C65HIuKPgFeTvRfPAR4mW8f9eN61m89/luz/fLvXcQvV\nLgHPBm4mW69fRXaF1o92oDbAvzaf2wHsJLs+zD0R8RMdqA1AROwi+7y/1YaaM6WU1uQf8Hng7dMe\nB/BN4A873I5HgeEuvQebmvWf36X63wH+SwfrPZHsP+QLgX8A3tqBmjcCtS69v28CPt2N2nO05W3A\nsQ7VOgS8a9a4/w28twO1e4AfAr88a/yXgD/Ose556xHgJLBv2uMnARPAb+Rde9pzP9V8/pmdWu45\nprmc7GJuT+tC7b7mdL/YidrAvye7Zk0FOAH8QTvrrskefniDnilPJttSfKiTRZu7XF9OtjX+zx0s\n/U7gUErpUx2sCbCtuRvu/0bEgYj4yQ7V/TXgSxHxoeZPOLWI2NOh2uc0/79dQ9bz7YR/Al4UEdua\n9Z9F1tv6WAdqP57sXiCPzBo/QYf27ABExCVke1amr+O+D3yBYq3j4LH13Pc6WbT5vb+uWTf3PYoR\nEcB7gTenlNp0F5aZ8rrwTt5avUHPutP8crwN+GxKqSO/KUfEDrKA7wHOAlel7PbHnaj9crJdfW3/\nLXURnwd+m2zPwmbgJuAzEbEjpfRwzrW3Ar9L9tPVn5D9lPPnETGZUjqQc+3prgI2Au/pUL03kfVm\nGxHxI7KfHt+YUvpA3oVTSj+IiH8GboiIBtk65RVkIXs87/rTXEQWcnOt4y7qYDu6KiIuIPs+vD+l\n9IMO1fwV4ANkHZqTwItTSp3oVL0e+LeU0jvyKrBWA38+QWd/U++mvwAGyHo+ndIAnkW2xf2fgPdG\nxAvyDv2IeBrZxs2LU0o/zLPWbCml6denvjcivgh8HfgN4H/lXP5xwBdTSjc0H38lIraTbQR0MvBf\nCfxtSumBDtX7TbKQfTlwlGxD7+0RcTKl9L4O1N8N/CXZb6j/D6gB7wdyP1hyCQqzjouIxwMfJlve\n31tk8nb6FNl6bhPZ8UIfjojnpJS+nVfBiBgC/oDsuIXcrMld+rR+g551JSLeAVwJ/EJK6VSn6qaU\n/l9K6b6UUi2l9Eay3Vyv7UDpIeBCoBoRP4yIHwI/D7w2Iv6tubejI1JKZ4BjQFuOll7EKWD2rr06\nsKUDtQGIiC1kB4i+q1M1gTcD/yOl9OGU0pGU0l3AfuANnSieUjqRUvpFsoOsfjKl9HPAj5P9ptop\nD5CFe1HXcVNh/5PASzrVuwdIKU0013NfTCntJdvoe1XOZZ9Pto77xrR13E8Bb42I+9pVZE0GfrOX\nVyU7khY4t4v7RWS//61bzbD/dbKDSO7vcnMeB1zQgTqfBH6GrKf3rObfl8h6uc9qHr/RERHxROCn\nycI4b5/j/J+onkG2h6FTXkkWMJ34/XxKifN7sY/S4fVVc8X/YET8O7KzJP6mg7VPkIX+9HXck8h+\n1un0Oq6jexSmhf1W4EUppU6clbKQTqzn3gs8k8fWb88i+znhzWTfvbZYy7v0u3aDnojYQNbDm+pZ\nbm0eWPRQSukbOdb9C2AEGAYejoiprf8zKaVcby0cEX8C/C3Z6Xl9ZAdx/TzwkjzrAjR/K59xnEJE\nPAx8J6+DW6bV+TOyo8a/TnYE7c1kW/yjedZt2g98LiLeQHY63HOBPWS7GXPX3Ij+beCvUkqPdqJm\n0yHgjRHxDeAI2a70fcC7O1E8Il5C9n/7X4FtZCvdOm1etyxhPfI24PqI+BrZ6WG3kJ2JtOJT1Bar\n3dzI2UL2nQ+g3Pw+PJBSWtEehoVqk4XcX5Nt3P8q8IRp67mHVvqT3iK1vwO8EThItkG/iey0yIvJ\nNkBWZAmf93dnTf9Dsve7fceOtPOQ/07/kf2uM0Z2BO0/A5d3qO7Pk/U4fjTr7y9zrjtXzR8Bv9WB\nZX43cF/zvX4AuAd4YRc/+0/RmdPyRslWshNkp8u8H7ikg8t5JfBVYJws/F7Zwdovbn6/nt7hz3YD\n2Qb9CbJzz4+TbWg9vkP1XwZ8rfmZfwt4O9CXQ51F1yNkB4mebH7+n2jXZ7FYbeA/z/P8f8uzNo+d\nBjh9/NTjF+Rc+wKyjY1vND/7bwJ3A4Od+rxnTX8fbT4tz5vnSJJUAGvyN3xJktQaA1+SpAIw8CVJ\nKgADX5KkAjDwJUkqAANfkqQCMPAlSSoAA1+SpAIw8CVJKgADX5KkAjDwJUkqgP8P/NE4gB5wycEA\nAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.array(res_rf)\n", + "y = x.transpose()\n", + "df = pd.DataFrame(y)\n", + "print('Standard Robinson Foulds:')\n", + "df.plot.box()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Testing the code" + ] + }, + { + "cell_type": "code", + "execution_count": 1451, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:0\n", + "INFO:root:(R,((H,G,E,(Q,L)speciation)duplication,K,(I,F)speciation,J)duplication)\n", + "INFO:root:1\n", + "INFO:root:(R,((H,G,E,(Q,L)speciation)duplication,K,(I,F)speciation,J)speciation)\n", + "INFO:root:2\n", + "INFO:root:(R,((H,G,E,(Q,L)speciation)duplication,K,I,F,J)speciation)\n", + "INFO:root:3\n", + "INFO:root:(R,(((H,E,(Q,L)speciation)duplication,G)duplication,K,I,F,J)speciation)\n", + "INFO:root:#flips 1 #col 1, #exp 2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(R,(H,(Q,L)speciation,E,K,G,(I,F)speciation,J)duplication)\n", + "/---------------------------------------------------------------------------- R\n", + "| \n", + "| /--------------------------------------------------- H\n", + "| | \n", + "| | /------------------------- Q\n", + "+ |-------------------------+ \n", + "| | \\------------------------- L\n", + "| | \n", + "| |--------------------------------------------------- E\n", + "| | \n", + "\\------------------------+--------------------------------------------------- K\n", + " | \n", + " |--------------------------------------------------- G\n", + " | \n", + " | /------------------------- I\n", + " |-------------------------+ \n", + " | \\------------------------- F\n", + " | \n", + " \\--------------------------------------------------- J\n", + " \n", + " \n", + "(R,(((H,E,(Q,L)speciation)duplication,G)duplication,K,I,F,J)speciation)\n", + "/---------------------------------------------------------------------------- R\n", + "| \n", + "| /------------------------------ H\n", + "| | \n", + "| /---------------+------------------------------ E\n", + "| | | \n", + "+ | | /--------------- Q\n", + "| /--------------+ \\--------------+ \n", + "| | | \\--------------- L\n", + "| | | \n", + "| | \\---------------------------------------------- G\n", + "| | \n", + "\\--------------+------------------------------------------------------------- K\n", + " | \n", + " |------------------------------------------------------------- I\n", + " | \n", + " |------------------------------------------------------------- F\n", + " | \n", + " \\------------------------------------------------------------- J\n", + " \n", + " \n" + ] + } + ], + "source": [ + "logging.root.level = 20\n", + "print(t)\n", + "t.print_plot()\n", + "t2 = mutateLabeledTree(t.clone(depth=1), 4)\n", + "t1 = t.clone(depth=1)\n", + "print(t2)\n", + "t2.print_plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 1452, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:1000000100\n", + "INFO:root:---\n", + "INFO:root:0010111010\n", + "INFO:root:0010110010\n", + "INFO:root:collapsing node \n", + "INFO:root:***\n", + "INFO:root: #component t1 1\n", + "INFO:root:{: <__main__.LRF..maxpath object at 0x10a320048>}\n", + "INFO:root: #component t2 1\n", + "INFO:root:{: <__main__.LRF..maxpath object at 0x10a319fd0>}\n", + "INFO:root: #flips 2\n", + "INFO:root:(R,(H,(Q,L)speciation,E,K,G,I,F,J)arbitrary)\n", + "INFO:root:(R,(H,E,(Q,L)speciation,G,K,I,F,J)arbitrary)\n", + "INFO:root:/---------------------------------------------------------------------------- R\n", + "| \n", + "| /--------------------------------------------------- H\n", + "| | \n", + "| | /------------------------- Q\n", + "+ |-------------------------+ \n", + "| | \\------------------------- L\n", + "| | \n", + "| |--------------------------------------------------- E\n", + "| | \n", + "\\------------------------+--------------------------------------------------- K\n", + " | \n", + " |--------------------------------------------------- G\n", + " | \n", + " |--------------------------------------------------- I\n", + " | \n", + " |--------------------------------------------------- F\n", + " | \n", + " \\--------------------------------------------------- J\n", + " \n", + " \n", + "INFO:root:Matching islands are both of type arbitrary & can save a flip!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step 1, collapse consistent edges: 1\n", + "{('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q'): , ('L', 'Q'): , ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): }\n", + "-\n", + "{('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q'): , ('L', 'Q'): , ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): }\n", + "{512, 4} {64, 160, 2, 256, 8, 16, 1022} {64, 512, 4, 256, 1022} {16, 8, 2, 160}\n", + "step 3, perform remaining flips -1\n", + "**TOTAL** extended RF dist: 4\n" + ] + }, + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 1452, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LRF(t1,t2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Legacy code by Gabriela" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## A) Identifying" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('E', 'F', 'G', 'H') (, )\n", + "('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q') (, )\n", + "('I', 'J', 'K', 'L') (, )\n", + "('I', 'J', 'K', 'L', 'Q') (, )\n", + "('E', 'F') (, )\n", + "('G', 'H') (, )\n", + "('K', 'L') (, )\n", + "('I', 'J') (, )\n", + "\n", + "\n", + "There are 8 bipartitions.\n" + ] + } + ], + "source": [ + "# Comment by CD: the code has a mistake in that it also returns a \"trivial\" bipartition \n", + "# with one taxon on one side (R) and all others on the other side.\n", + "\n", + "def id_bipartitions_and_linkers(tree):\n", + " tree.encode_bipartitions()\n", + " linkers_bipartitions = {}\n", + " labeled_nodes = [node for node in tree if node.label]\n", + " for node in labeled_nodes:\n", + " if len(node.bipartition.leafset_taxa(taxon_namespace = tree.taxon_namespace))>1:\n", + " #if node.parent_node in labeled_nodes:\n", + " linkers_bipartitions[tuple(sorted([nd.label for nd in node.bipartition.leafset_taxa(taxon_namespace = tree.taxon_namespace)]))] = (node, node.parent_node)\n", + " return linkers_bipartitions\n", + "\n", + "# testing:\n", + "for element in id_bipartitions_and_linkers(t1):\n", + " print (element, id_bipartitions_and_linkers(t1)[element])\n", + "print (\"\\n\")\n", + "print (\"There are\", len(id_bipartitions_and_linkers(t1)), \"bipartitions.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The set of differing biparitions is: {('E', 'F', 'G', 'H', 'R'), ('E', 'F', 'G', 'H', 'Q'), ('I', 'J', 'K', 'L', 'Q'), ('I', 'J', 'K', 'L', 'R')} \n", + "\n", + "All the edges that will be collapsed in tree_1 [(, ), (, )]\n", + "All the edges that will be collapsed in tree_2 [(, ), (, )]\n", + "\n", + "\n", + "The edges that will be collapsed in this first step in tree 1 are:\n", + "(, ) \n", + "\n", + "The edges that will be collapsed in this first step in tree 2 are:\n", + "(, ) \n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# note CD: some bipartitions are counted twice (e.g. ('E', 'F', 'G', 'H', 'R') == ('I', 'J', 'K', 'L', 'Q'))\n", + "\n", + "def id_differing(tree_1, tree_2):\n", + " differing_bipartitions = set(id_bipartitions_and_linkers(tree_1).keys()).symmetric_difference(set(id_bipartitions_and_linkers(tree_2)))\n", + " return differing_bipartitions\n", + "differing_bipartitions = id_differing(tree_1,tree_2)\n", + "\n", + "tree_1_edges = [id_bipartitions_and_linkers(tree_1)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_1).keys()]\n", + "tree_2_edges = [id_bipartitions_and_linkers(tree_2)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_2).keys()]\n", + "\n", + "diff_same_tree_1 = [element for element in tree_1_edges if element[0].label == element[1].label]\n", + "diff_same_tree_2 = [element for element in tree_2_edges if element[0].label == element[1].label]\n", + "\n", + "\n", + "# testing\n", + "print (\"The set of differing biparitions is:\", differing_bipartitions, \"\\n\")\n", + "\n", + "\n", + "print (\"All the edges that will be collapsed in tree_1\", tree_1_edges)\n", + "print (\"All the edges that will be collapsed in tree_2\", tree_2_edges)\n", + "print (\"\\n\")\n", + "print (\"The edges that will be collapsed in this first step in tree 1 are:\")\n", + "for elem in range(0, len(diff_same_tree_1)):\n", + " print (diff_same_tree_1[elem], \"\\n\")\n", + "print (\"The edges that will be collapsed in this first step in tree 2 are:\")\n", + "for elem in range(0, len(diff_same_tree_2)):\n", + " print (diff_same_tree_2[elem], \"\\n\")\n", + "print (\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## B) Collapsing" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The first tree is now ((R,((E,F)speciation,(G,H)duplication)speciation)speciation,Q,((K,L)speciation,(J,I)speciation)speciation)duplication\n", + "The differing bipartitions are now only {('E', 'F', 'G', 'H', 'Q'), ('E', 'F', 'G', 'H', 'R')}\n" + ] + } + ], + "source": [ + "for element in diff_same_tree_1: element[0].edge.collapse()\n", + "for element in diff_same_tree_2: element[0].edge.collapse()\n", + "\n", + "# testing\n", + "\n", + "print (\"The first tree is now\", tree_1)\n", + "print (\"The differing bipartitions are now only\", id_differing(tree_1, tree_2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " /--------------------------------------------------------- R\n", + " | \n", + "/------------------+ /------------------- E\n", + "| | /------------------+ \n", + "| | | \\------------------- F\n", + "| \\------------------+ \n", + "| | /------------------- G\n", + "| \\------------------+ \n", + "+ \\------------------- H\n", + "| \n", + "|---------------------------------------------------------------------------- Q\n", + "| \n", + "| /------------------- K\n", + "| /------------------+ \n", + "| | \\------------------- L\n", + "\\-------------------------------------+ \n", + " | /------------------- J\n", + " \\------------------+ \n", + " \\------------------- I\n", + " \n", + " \n", + " /--------------------------------------------------------- Q\n", + " | \n", + "/------------------+ /------------------- E\n", + "| | /------------------+ \n", + "| | | \\------------------- F\n", + "| \\------------------+ \n", + "| | /------------------- G\n", + "| \\------------------+ \n", + "+ \\------------------- H\n", + "| \n", + "|---------------------------------------------------------------------------- R\n", + "| \n", + "| /------------------- K\n", + "| /------------------+ \n", + "| | \\------------------- L\n", + "\\-------------------------------------+ \n", + " | /------------------- J\n", + " \\------------------+ \n", + " \\------------------- I\n", + " \n", + " \n" + ] + } + ], + "source": [ + "tree_1.print_plot()\n", + "tree_2.print_plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 645, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 645, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.randint(2,2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'tuple' object has no attribute 'edge'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdiff_same_tree_1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'edge'" + ] + } + ], + "source": [ + "diff_same_tree_1[0].edge" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## C) How many operations to be added to extended Robinson-Foulds?" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "erf = 0\n", + "erf += len(diff_same_tree_1)+ len(diff_same_tree_2)\n", + "\n", + "#testing\n", + "\n", + "print (erf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# 2) Dealing with diff_diff bipartitions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## A) Identifying" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(, )\n", + "(, )\n", + "(, )\n", + "(, )\n", + "(, None)\n", + "(, )\n", + "(, )\n", + "(, )\n", + "\n", + "\n", + "('There are', 8, 'bipartitions in tree 1.')\n", + "('There are', 8, 'bipartitions in tree 2.')\n" + ] + } + ], + "source": [ + "for element in id_bipartitions_and_linkers(tree_1):\n", + " print (id_bipartitions_and_linkers(tree_1)[element])\n", + "print (\"\\n\")\n", + "print (\"There are\", len(id_bipartitions_and_linkers(tree_1)), \"bipartitions in tree 1.\")\n", + "print (\"There are\", len(id_bipartitions_and_linkers(tree_2)), \"bipartitions in tree 2.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "set([('E', 'F', 'G', 'H', 'R'), ('E', 'F', 'G', 'H', 'Q')])\n", + "('The edges that will be collapsed in tree 1 in this step are', [(, )])\n", + "('The edges that will be collapsed in tree 2 in this step are', [(, )])\n" + ] + } + ], + "source": [ + "differing_bipartitions = id_differing(tree_1,tree_2)\n", + "\n", + "tree_1_edges = [id_bipartitions_and_linkers(tree_1)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_1).keys()]\n", + "tree_2_edges = [id_bipartitions_and_linkers(tree_2)[element] for element in differing_bipartitions if element in id_bipartitions_and_linkers(tree_2).keys()]\n", + "\n", + "diff_diff_tree_1 = [element for element in tree_1_edges if element[0].label != element[1].label]\n", + "diff_diff_tree_2 = [element for element in tree_2_edges if element[0].label != element[1].label]\n", + "\n", + "# testing\n", + "print (differing_bipartitions)\n", + "print (\"The edges that will be collapsed in tree 1 in this step are\", diff_diff_tree_1)\n", + "print (\"The edges that will be collapsed in tree 2 in this step are\", diff_diff_tree_2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## B) Organizing in subtrees" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{: [], : []}]\n", + "[{: [], : []}]\n" + ] + } + ], + "source": [ + "def adjacent_edges(node,diff):\n", + " output = []\n", + " for edge in diff:\n", + " if node in edge:\n", + " output.append(edge)\n", + " return output\n", + "\n", + "visited = {}\n", + "for edge in diff_diff_tree_1:\n", + " visited[edge] = False\n", + "\n", + "def explore(node,orig,diff, subtree = defaultdict(set)):\n", + " for edge in adjacent_edges(node,diff):\n", + " if edge == orig:\n", + " continue\n", + " else:\n", + " if edge[0] == node:\n", + " dest = edge[1]\n", + " else:\n", + " dest = edge[0]\n", + " subtree[node].add(dest)\n", + " subtree[dest].add(node)\n", + " orig = edge\n", + " explore(dest,orig,diff)\n", + " return subtree\n", + "\n", + "\n", + "subtrees_tree_1 = []\n", + "subtrees_tree_2 = []\n", + "nodes_tree_1 = set(list(itertools.chain(*diff_diff_tree_1)))\n", + "nodes_tree_2 = set(list(itertools.chain(*diff_diff_tree_2)))\n", + "\n", + "for node in nodes_tree_1:\n", + " subtree = defaultdict(set)\n", + " if explore(node, None, diff_diff_tree_1, subtree = subtree) not in subtrees_tree_1: \n", + " subtrees_tree_1.append(explore(node, None, diff_diff_tree_1, subtree = subtree))\n", + " \n", + "for node in nodes_tree_2:\n", + " subtree = defaultdict(set)\n", + " if explore(node, None, diff_diff_tree_2, subtree=subtree) not in subtrees_tree_2: \n", + " subtrees_tree_2.append(explore(node, None, diff_diff_tree_2, subtree= subtree))\n", + "\n", + "for i in range(0, len(subtrees_tree_1)): \n", + " subtrees_tree_1[i] = dict(subtrees_tree_1[i])\n", + " for j in subtrees_tree_1[i]:\n", + " subtrees_tree_1[i][j] = list(subtrees_tree_1[i][j])\n", + "for i in range(0, len(subtrees_tree_2)): \n", + " subtrees_tree_2[i] = dict(subtrees_tree_2[i])\n", + " for j in subtrees_tree_2[i]:\n", + " subtrees_tree_2[i][j] = list(subtrees_tree_2[i][j])\n", + "\n", + "print (subtrees_tree_1)\n", + "print (subtrees_tree_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## C) Longest chains for all subtrees" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{: None, : 1}\n", + "\n", + "['speciation']\n", + "['speciation']\n", + "('List of chain lengths', [1, 1])\n", + "('Total number of flips is', 2)\n" + ] + } + ], + "source": [ + "def bfs(start, adj):\n", + " distances = {}\n", + " distances[start] = None\n", + " i = 1\n", + " frontier = [start]\n", + " while len(frontier)>0:\n", + " next_array = []\n", + " for u in frontier:\n", + " for v in adj[u]:\n", + " if v not in distances:\n", + " distances[v]=i\n", + " next_array.append(v)\n", + " frontier = next_array\n", + " i+=1\n", + " return distances\n", + "\n", + "for i in range(0, len(subtrees_tree_1)):\n", + " print (bfs(list(nodes_tree_1)[0], subtrees_tree_1[i]))\n", + " \n", + "import math \n", + "\n", + "def longest_distance(distances):\n", + " values = list(distances.values())\n", + " keys = list(distances.keys())\n", + " return keys[values.index(max(values))], max(values)\n", + "\n", + "list_of_chains = []\n", + "end_types_1 = []\n", + "end_types_2 = []\n", + "\n", + "for i in range(0, len(subtrees_tree_1)):\n", + " print list(nodes_tree_1)[0]\n", + " first_node = longest_distance(bfs(list(nodes_tree_1)[0], subtrees_tree_1[i]))[0]\n", + " furthest_node = longest_distance(bfs(first_node,subtrees_tree_1[i]))[1]\n", + " list_of_chains.append(furthest_node)\n", + " if furthest_node % 2 == 1:\n", + " if (furthest_node - math.ceil(furthest_node/2))%2 == 0:\n", + " end_type = first_node.label\n", + " else:\n", + " if first_node.label == \"speciation\":\n", + " end_type = \"duplication\"\n", + " elif first_node.label == \"duplication\":\n", + " end_type = \"speciation\"\n", + " elif furthest_node % 2 == 0:\n", + " end_type = \"undetermined\"\n", + " end_types_1.append(end_type)\n", + "\n", + "for i in range(0, len(subtrees_tree_2)):\n", + " first_node = longest_distance(bfs(list(nodes_tree_2)[0], subtrees_tree_2[i]))[0]\n", + " furthest_node = longest_distance(bfs(first_node,subtrees_tree_2[i]))[1]\n", + " list_of_chains.append(furthest_node)\n", + " if furthest_node % 2 == 1:\n", + " if (furthest_node - math.ceil(furthest_node/2))%2 == 0:\n", + " end_type = first_node.label\n", + " else:\n", + " if first_node.label == \"speciation\":\n", + " end_type = \"duplication\"\n", + " elif first_node.label == \"duplication\":\n", + " end_type = \"speciation\"\n", + " elif furthest_node % 2 == 0:\n", + " end_type = \"undetermined\"\n", + " end_types_2.append(end_type)\n", + " \n", + "print (end_types_1)\n", + "print (end_types_2)\n", + "\n", + "print (\"List of chain lengths\", list_of_chains)\n", + "flips = 0\n", + "for i in list_of_chains:\n", + " if i == 1:\n", + " flips += 1\n", + " elif i > 1:\n", + " flips += math.floor(i/2)\n", + " \n", + "print (\"Total number of flips is\", int(flips))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## D) Collapsing " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def family_edges(subtrees_tree_1, diff_diff_tree_1):\n", + " family_edges_1 = []\n", + " for subtree in subtrees_tree_1:\n", + " family = subtree.keys()\n", + " family_edges = []\n", + " for edge in diff_diff_tree_1:\n", + " for key in family:\n", + " if key in edge:\n", + " family_edges.append(edge)\n", + " family_edges_1.append(list(set(family_edges)))\n", + " return family_edges_1\n", + "\n", + "\n", + "for family in range(0, len(family_edges(subtrees_tree_1, diff_diff_tree_1))):\n", + " for element in family_edges(subtrees_tree_1, diff_diff_tree_1)[family]:\n", + " element[0].edge.collapse()\n", + " final_node = element[1]\n", + " final_node.label = end_types_1[family] \n", + "\n", + "for family in range(0, len(family_edges(subtrees_tree_2, diff_diff_tree_2))):\n", + " for element in family_edges(subtrees_tree_2, diff_diff_tree_2)[family]:\n", + " element[0].edge.collapse()\n", + " final_node = element[1]\n", + " final_node.label = end_types_2[family]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "source": [ + "## E) How many operations to be added to the extended Robinson-Foulds metric?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "erf += flips + len(diff_diff_tree_1) + len(diff_diff_tree_2)\n", + "print (\"flips\", flips)\n", + "print (\"tree_1 operations\", len(diff_diff_tree_1))\n", + "print (\"tree_2 operations\", len(diff_diff_tree_2))\n", + "print (erf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# 3) Matching nodes in identical trees" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## 1) Quick check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "print (id_bipartitions_and_linkers(tree_1).keys())\n", + "print (id_bipartitions_and_linkers(tree_2).keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## 2) Checking if, for same topologies, matching nodes have same type" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{('G', 'H'): 'duplication', ('E', 'F', 'G', 'H', 'R'): 'speciation', ('K', 'L'): 'speciation', ('E', 'F'): 'speciation', ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): 'duplication', ('E', 'F', 'G', 'H'): 'speciation', ('I', 'J', 'K', 'L'): 'speciation', ('I', 'J'): 'speciation'}\n", + "{('G', 'H'): 'speciation', ('K', 'L'): 'speciation', ('E', 'F'): 'duplication', ('E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'Q', 'R'): 'duplication', ('E', 'F', 'G', 'H'): 'speciation', ('I', 'J'): 'speciation', ('I', 'J', 'K', 'L'): 'speciation', ('E', 'F', 'G', 'H', 'Q'): 'speciation'}\n" + ] + } + ], + "source": [ + "def matching_nodes(tree_1):\n", + " tree_transition = tree_1.clone()\n", + " internal_nodes = [node for node in tree_transition if node.is_internal()]\n", + " leaves_dic = {}\n", + " for node in internal_nodes:\n", + " leaves_dic[tuple(sorted(nd.taxon.label for nd in dendropy.Tree(seed_node = node).leaf_nodes()))] = node.label\n", + " return leaves_dic\n", + "print (matching_nodes(tree_1))\n", + "print (matching_nodes(tree_2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "diff_id = 0\n", + "for node in matching_nodes(tree_1):\n", + " if (matching_nodes(tree_1)[node] == \"speciation\" and matching_nodes(tree_2)[node] == \"duplication\") or (matching_nodes(tree_1)[node] == \"duplication\" and matching_nodes(tree_2)[node] == \"speciation\"):\n", + " diff_id += 1\n", + "print (diff_id) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## C) How many operations to be added to extended Robinson-Foulds metric?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "print (erf)\n", + "erf += diff_id\n", + "print (erf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# FINAL RESULT:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "print (\"The extended Robinson-Foulds metric between those two trees is\", erf)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/www/index.html b/www/index.html index a9c3c622..9ac4e887 100755 --- a/www/index.html +++ b/www/index.html @@ -900,8 +900,6 @@ }); - - /*------ / / SETTINGS @@ -914,7 +912,6 @@ }); } - var settingsShown = false; $("#settings").click(function (e) {