In [1]:
def de_bruijn_ize(st, k):
    """ Return a list holding, for each k-mer, its left
        k-1-mer and its right k-1-mer in a pair """
    edges = []
    nodes = set()
    for i in range(len(st) - k + 1):
        edges.append((st[i:i+k-1], st[i+1:i+k]))
        nodes.add(st[i:i+k-1])
        nodes.add(st[i+1:i+k])
    return nodes, edges

In [2]:
nodes, edges = de_bruijn_ize("ACGCGTCG", 3)

In [3]:
nodes

{'AC', 'CG', 'GC', 'GT', 'TC'}

In [4]:
edges

[('AC', 'CG'),
 ('CG', 'GC'),
 ('GC', 'CG'),
 ('CG', 'GT'),
 ('GT', 'TC'),
 ('TC', 'CG')]

In [5]:
def visualize_de_bruijn(st, k):
    """ Visualize a directed multigraph using graphviz """
    nodes, edges = de_bruijn_ize(st, k)
    dot_str = 'digraph "DeBruijn graph" {\n'
    for node in nodes:
        dot_str += '  %s [label="%s"] ;\n' % (node, node)
    for src, dst in edges:
        dot_str += '  %s -> %s ;\n' % (src, dst)
    return dot_str + '}\n'

In [None]:
# might have to do this first:
# %install_ext https://raw.github.com/cjdrake/ipython-magic/master/gvmagic.py
%reload_ext gvmagic


In [13]:
pip install gvmagic

Collecting gvmagic
  Downloading https://files.pythonhosted.org/packages/f7/f8/725b48babca08b58e863bd6f2335641578874c795c5936f8aba1802b74a9/gvmagic-0.5.tar.gz
Building wheels for collected packages: gvmagic
  Building wheel for gvmagic (setup.py) ... [?25l[?25hdone
  Created wheel for gvmagic: filename=gvmagic-0.5-cp27-none-any.whl size=10376 sha256=390eed89ff5965d795798c3e1969d69b50ed8b49b9e502ee517130252b0e04cc
  Stored in directory: /root/.cache/pip/wheels/f6/67/1f/64d62350d4cffe0a94e39af37828a6a5bfba1b9b9d6d95dbf8
Successfully built gvmagic
Installing collected packages: gvmagic
Successfully installed gvmagic-0.5


In [18]:
%dotstr visualize_de_bruijn("ACGCGTCG", 3)

UsageError: Line magic function `%dotstr` not found.


In [19]:
nodes, edges = de_bruijn_ize('a_long_long_long_time',5)

In [20]:
nodes

{'_lon',
 '_tim',
 'a_lo',
 'g_lo',
 'g_ti',
 'long',
 'ng_l',
 'ng_t',
 'ong_',
 'time'}

In [10]:
edges

[('a_lo', '_lon'),
 ('_lon', 'long'),
 ('long', 'ong_'),
 ('ong_', 'ng_l'),
 ('ng_l', 'g_lo'),
 ('g_lo', '_lon'),
 ('_lon', 'long'),
 ('long', 'ong_'),
 ('ong_', 'ng_l'),
 ('ng_l', 'g_lo'),
 ('g_lo', '_lon'),
 ('_lon', 'long'),
 ('long', 'ong_'),
 ('ong_', 'ng_t'),
 ('ng_t', 'g_ti'),
 ('g_ti', '_tim'),
 ('_tim', 'time')]