MaximalNonBranchingPaths(Graph)

        Paths ← empty list

        for each node v in Graph

            if v is not a 1-in-1-out node

                if out(v) > 0

                    for each outgoing edge (v, w) from v

                        NonBranchingPath ← the path consisting of the single edge (v, w)

                        while w is a 1-in-1-out node

                            extend NonBranchingPath by the outgoing edge (w, u) from w 

                            w ← u

                        add NonBranchingPath to the set Paths

        for each isolated cycle Cycle in Graph

            add Cycle to Paths
            
        return Paths

In [1]:
def DefineGraphDict(graph):
  graph_dict = {}
  for node_adjacent_nodes in graph:
    node_adjacent_nodes = node_adjacent_nodes.split(' -> ')
    graph_dict.setdefault(int(node_adjacent_nodes[0]),[])
    node_adjacent_nodes[1] = node_adjacent_nodes[1].split(',')
    for node in node_adjacent_nodes[1]:
      graph_dict[int(node_adjacent_nodes[0])].append(int(node))
  return graph_dict

In [2]:
def NodeInDegree(graph_dict,node):
  node_in_degree = 0
  for adjacent_nodes in graph_dict.values():
    node_in_degree = node_in_degree + adjacent_nodes.count(node)
  return node_in_degree

In [3]:
def NodeOutDegree(graph_dict,node):
  if node in graph_dict.keys():
    return len(graph_dict[node])
  else:
    return 0

In [4]:
def GraphNodes(graph_dict):
  graph_nodes = set()
  for key,adjacent_nodes in graph_dict.items():
    graph_nodes.update([key] + adjacent_nodes)
  return graph_nodes

A node v in a directed graph Graph is called a 1-in-1-out node if its indegree and outdegree are both equal to 1, i.e., in(v) = out(v) = 1.  We can rephrase the definition of a "maximal non-branching path" from the main text as a path whose internal nodes are 1-in-1-out nodes and whose initial and final nodes are not 1-in-1-out nodes.  Also, note that the definition from the main text does not handle the special case when Graph has a connected component that is an isolated cycle, in which all nodes are 1-in-1-out nodes.

In [20]:
def OneInOneOutNode(graph_dict,node):
  if NodeInDegree(graph_dict,node) == 1 and NodeOutDegree(graph_dict,node) == 1:
    return True
  else:
    return False

In [21]:
def NonBranchingPaths(graph_dict):
  paths = []
  for node in GraphNodes(graph_dict):
    if not OneInOneOutNode(graph_dict,node): #not 1-in-1-out node
      if NodeOutDegree(graph_dict,node) > 0:
        for adjacent_node in graph_dict[node]:
          non_branching_path = [node,adjacent_node]
          while NodeInDegree(graph_dict,adjacent_node) == 1 and NodeOutDegree(graph_dict,adjacent_node) == 1:
            non_branching_path.append(graph_dict[adjacent_node][0])
            adjacent_node = graph_dict[adjacent_node][0]
          paths.append(non_branching_path)
  for node in GraphNodes(graph_dict):
      #definition from the main text does not handle the special case when Graph has a connected component that is an isolated cycle, in which all nodes are 1-in-1-out nodes.
      if OneInOneOutNode(graph_dict,node): #1-in-1-out node
        for adjacent_node in graph_dict[node]:
          non_branching_path = [node,adjacent_node]
          while NodeInDegree(graph_dict,adjacent_node) == 1 and NodeOutDegree(graph_dict,adjacent_node) == 1 and adjacent_node != node:
            non_branching_path.append(graph_dict[adjacent_node][0])
            adjacent_node = graph_dict[adjacent_node][0]
          paths.append(non_branching_path)
  return paths

In [22]:
def FindAllKmers(non_branching_path,k):
  kmers_list = []
  i = 0
  while i + k - 1 <= len(non_branching_path) - 1:
    kmers_list.append(non_branching_path[i:i+k])
    i = i + 1
  return kmers_list

A maximal non-branching path is a non-branching path that cannot be extended into a longer non-branching path --> if we have a list of non-branching paths and if we sort the list by increasing non-branching paths length, certain non-branching path can only be contained in non-branching paths with higher length

In [23]:
from numpy import delete

In [24]:
def MaximalNonBranchingPaths(paths):
  indices_to_delete = []
  for i in range(len(paths)):
    for j in range(len(paths)):
      if len(paths[i]) < len(paths[j]):
        if paths[i] != paths[j] and paths[i] in FindAllKmers(paths[j],len(paths[i])):
          indices_to_delete.append(i)
      elif len(paths[i]) == len(paths[j]):
        if paths[i] != paths[j] and set(paths[i]) == set(paths[j]) and j not in indices_to_delete:
          indices_to_delete.append(i)
  paths = list(delete(paths,indices_to_delete))
  return paths

In [25]:
def PrintResult(paths):
  for path in paths:
    string_to_print = ''
    for node in path:
      string_to_print = string_to_print + str(node) + ' -> '
    print(string_to_print[0:len(string_to_print)-4])

In [26]:
def PrintResultToFile(paths):
  f = open("task_result.txt","w")
  for path in paths:
    string_to_print = ''
    for node in path:
      string_to_print = string_to_print + str(node) + ' -> '
    f.write((string_to_print[0:len(string_to_print)-4] + '\n'))
  f.close()

In [27]:
graph = [
'1 -> 2',
'2 -> 3',
'3 -> 4,5',
'6 -> 7',
'7 -> 6']

In [28]:
graph_dict = DefineGraphDict(graph)
graph_dict

{1: [2], 2: [3], 3: [4, 5], 6: [7], 7: [6]}

In [29]:
MaximalNonBranchingPaths(NonBranchingPaths(graph_dict))

  arr = asarray(arr)


[[1, 2, 3], [3, 4], [3, 5], [7, 6, 7]]

In [30]:
PrintResult(MaximalNonBranchingPaths(NonBranchingPaths(graph_dict)))

1 -> 2 -> 3
3 -> 4
3 -> 5
7 -> 6 -> 7


  arr = asarray(arr)


In [34]:
with open('/content/rosalind_ba3m.txt') as task_file:
  graph = [line.rstrip() for line in task_file]

In [35]:
graph_dict = DefineGraphDict(graph)

In [36]:
PrintResultToFile(MaximalNonBranchingPaths(NonBranchingPaths(graph_dict)))

  arr = asarray(arr)
