# Generate All Maximal Non-Branching Paths in a Graph

Reference : [Here](http://rosalind.info/problems/ba3m/)

#### Input  : The adjacency list of a graph whose nodes are integers.
#### Output : The collection of all maximal non-branching paths in the graph.

In [2]:
import pandas as pd
import numpy as np
import os

## Download the sample dataset from this [link](http://bioinformaticsalgorithms.com/data/extradatasets/assembly/maximal_nonbranching_paths.txt).
## Loading the Input file

In [3]:
dataset = 'sample_maximal_nonbranching_paths'
dataset_folder = os.getcwd() + "\\TestData\\"
with open(dataset_folder+dataset+'.txt') as file:
    data = [line.rstrip() for line in file]
# print(*data, sep="\n")
inp = data[1:data.index('Output')]
out = data[data.index('Output')+1:]
# print(*out, sep="\n")
data = inp

## Custom Inputs

In [4]:
# data = [
#     '1 -> 2',
#     '2 -> 3',
#     '3 -> 4,5',
#     '6 -> 7',
#     '7 -> 6'
# ]

data = [
    '1 -> 2',
    '2 -> 3',
    '3 -> 4,5',
    '6 -> 7',
    '7 -> 8',
    '8 -> 9',
    '10 -> 11',
    '9 -> 6',
    '11 -> 10'
]

## Pseudocode

    MaximalNonBranchingPaths(Graph)
        Paths ← empty list
        for each node v in Graph
            if v is not a 1-in-1-out node
                if out(v) > 0
                    for each outgoing edge (v, w) from v
                        NonBranchingPath ← the path consisting of the single edge (v, w)
                        while w is a 1-in-1-out node
                            extend NonBranchingPath by the outgoing edge (w, u) from w 
                            w ← u
                        add NonBranchingPath to the set Paths
        for each isolated cycle Cycle in Graph
            add Cycle to Paths
        return Paths 

## Generating the adjacency list corresponding to the input

In [5]:
def convert_to_graph_edges(raw_data):
    graph = {}
    for line in raw_data:
        x = line.split(" -> ")
        a,b = x[0],x[1].split(',')
        subset = graph.get(x[0],[])
        graph[x[0]] = subset+b
        
    return graph

## Generating the adjacency matrix from the adjacency list

In [6]:
def load_adj_mat(graph_edges): # this method creates adjacent matrix
    st = set()
    for key in graph_edges.keys():
        st.add(key)
        for value in graph_edges[key]:
            st.add(value)
    
    st = list(np.sort([int(x) for x in list(st)]))
    st = [str(x) for x in st]# lexicographical order
    adj_mat = pd.DataFrame(np.zeros((len(st),len(st)),dtype=int),index=list(st),columns=list(st))
    
    for key in graph_edges.keys():
        for value in graph_edges[key]:
            adj_mat.loc[key,value] = 1
            
    return adj_mat

## Performing DFS for linear and cyclic paths

In [7]:
def do_DFS(matrix,start,List):
    List.append(start)
    outgoing_nodes = matrix.index[matrix.loc[start].isin([1])].tolist()

    if (outgoing_nodes == []):
        return List
    
    for outnode in outgoing_nodes:
        if outnode not in List:
            List = do_DFS(matrix,outnode,List) # Recursive Call
    return List

#For isolated cycle detection
def i_dfs(matrix,u,ipaths,status,parent):
    status[u] = 'G'
    for v in matrix.columns[matrix.loc[u].isin([1])].tolist():
        if status[v] == 'G':
            parent[v] = u
            add_cycle(u,parent,ipaths)
            break
        if status[v] == 'W':
            parent[v] = u
            i_dfs(matrix,v,ipaths,status,parent)
    status[u] = 'B'

## Adding the cyclic paths to the final output

In [8]:
def add_cycle(u,parent,ipaths):
    start = u
    path = f'{u} -> {parent[u]}'
    u = parent[u]
    while u!=start:
        path = path + f' -> {parent[u]}'
        u = parent[u]
    ipaths.append(path)

## Finding the Maximal Non Branching Paths

In [9]:
def MaximalNonBranchingPaths(graph):
    matrix = load_adj_mat(graph)
    Paths = []
    Set = set()
    
    #ICycle inits
    status = {}
    parent = {}
    icycle_paths = []
    
    for key in graph.keys():
        if (not(matrix.loc[key].sum()==1 and matrix[key].sum()==1)):
            if (matrix.loc[key].sum()> 0):
                outgoing_nodes = matrix.index[matrix.loc[key].isin([1])].tolist()
                Set.add(key)
                for outnode in outgoing_nodes:
                    path = key+""
                    while(matrix.loc[outnode].sum()==1 and matrix[outnode].sum()==1):   
                        Set.add(outnode)
                        path = path + " -> " + outnode
                        outnode = matrix.index[matrix.loc[outnode].isin([1])].tolist()[0]

                    Set.add(outnode)
                    Paths.append(path + " -> " + outnode)
                    
    icycle_nodes = sorted([key for key in graph.keys() if key not in Set])
    for node in icycle_nodes:
        status[node] = 'W'
        parent[node] = None
    
    for node in icycle_nodes:
        i_dfs(matrix,node,icycle_paths,status,parent)
    
    newlis = list()
    for i in icycle_paths:
        newx = [x for x in i.split(' -> ')[::-1]]
        newlis.append(' -> '.join(newx))
        
    Paths += newlis
    return (Paths,matrix,icycle_paths)

In [10]:
graph = convert_to_graph_edges(data)
Paths,Matrix,ic = MaximalNonBranchingPaths(graph)

with open('output.txt','w') as f:
    for path in Paths:
        f.write(path + '\n')

# print('Raw Data    :',data)
print('Input Graph Edges :',graph)
print('\nAdjacent Matrix :')
display(Matrix)

# print('\n'+'*'*30+'Generated Maximal Non-Branching Paths'+'*'*30)
print('Generated Maximal Non-Branching Paths')
print(*Paths,sep='\n')

Input Graph Edges : {'1': ['2'], '2': ['3'], '3': ['4', '5'], '6': ['7'], '7': ['8'], '8': ['9'], '10': ['11'], '9': ['6'], '11': ['10']}

Adjacent Matrix :


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11
1,0,1,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,0
3,0,0,0,1,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,1,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0,0
8,0,0,0,0,0,0,0,0,1,0,0
9,0,0,0,0,0,1,0,0,0,0,0
10,0,0,0,0,0,0,0,0,0,0,1


Generated Maximal Non-Branching Paths
1 -> 2 -> 3
3 -> 4
3 -> 5
11 -> 10 -> 11
9 -> 6 -> 7 -> 8 -> 9


In [11]:
graph

{'1': ['2'],
 '2': ['3'],
 '3': ['4', '5'],
 '6': ['7'],
 '7': ['8'],
 '8': ['9'],
 '10': ['11'],
 '9': ['6'],
 '11': ['10']}