In [1]:
import re

def parse_graph_input(input_str):
    # Find all pairs of digits
    edge_pairs = re.findall(r'\((\d+),\s*(\d+)\)', input_str)
    return [(int(u), int(v)) for u, v in edge_pairs]

def build_adjacency_list(edges):
    adj_list = {}
    all_nodes = set()
    for u, v in edges:
        if u not in adj_list: adj_list[u] = 0
        if v not in adj_list: adj_list[v] = 0
        adj_list[u] = v
        adj_list[v] = u
        all_nodes.add(u)
        all_nodes.add(v)
    return adj_list, sorted(list(all_nodes))

def find_ordered_cycles(adj_list, all_nodes):
    visited = set()
    cycles = []

    for start_node in all_nodes:
        if start_node not in visited:
            # Set the starting point
            current_cycle = []
            current_node = start_node
            
            # Travel the cycle, break when you find a visited node
            while current_node not in visited:
                visited.add(current_node)
                current_cycle.append(current_node)

                # Travel across the black edge
                if current_node % 2 == 0:
                    paired_node = current_node - 1
                else:
                    paired_node = current_node + 1
                
                visited.add(paired_node)
                current_cycle.append(paired_node)

                # Travel the colored eedge
                current_node = adj_list[paired_node]
            
            cycles.append(current_cycle)
    return cycles

def cycle_to_chromosome(ordered_cycle_nodes):
    chromosome = []
    # For each black edge
    for i in range(0, len(ordered_cycle_nodes), 2):
        node1 = ordered_cycle_nodes[i]
        node2 = ordered_cycle_nodes[i+1]
        
        # +k for edges going to the right
        if node1 < node2:
            block = node2 // 2
            chromosome.append(block)
        # -k for edges going left
        else:
            block = -(node1 // 2)
            chromosome.append(block)
    return chromosome

def format_genome_output(genome):
    output_str = ""
    for chrom in genome:
        formatted_chrom = ["+" + str(x) if x > 0 else str(x) for x in chrom]
        output_str += f"({' '.join(formatted_chrom)})"
    return output_str

def graph_to_genome(edges):
    adj_list, all_nodes = build_adjacency_list(edges)
    
    cycles = find_ordered_cycles(adj_list, all_nodes)

    # Convert to chromosome
    genome = []
    for cycle_nodes in cycles:
        chromosome = cycle_to_chromosome(cycle_nodes)
        genome.append(chromosome)
        
    return genome

if __name__ == "__main__":
    file_path = "../data/rosalind_ba6i.txt" 
    
    input_str = ""
    with open(file_path, 'r') as f:
        input_str = f.readline().strip()

    parsed_edges = parse_graph_input(input_str)
    final_genome = graph_to_genome(parsed_edges)
    final_output = format_genome_output(final_genome)
    
    print(final_output)

(+1 -2 +3 +4 -5 +6 +7 +8 -9 +10 +11 -12 +13 +14 -15 -16 +17 -18 +19 +20 -21 +22 +23 -24 -25 +26 +27)(+28 -29 +30 -31 +32 +33 +34 -35 -36 -37 -38 -39 -40 -41 +42 -43 +44 -45 -46 -47 -48 -49 -50)(+51 -72 -71 -70 -69 -68 -67 +66 -65 -64 +63 -62 +61 -60 -59 +58 +57 -56 +55 +54 -53 -52)(+73 -74 -75 +76 +77 +78 +79 -80 -81 +82 -83 -84 +85 -86 +87 +88 +89 -90 -91 +92 -93 -94)(+95 -124 -123 +122 -121 +120 -119 +118 +117 -116 -115 -114 +113 +112 +111 +110 +109 +108 +107 +106 -105 +104 +103 +102 +101 -100 -99 -98 +97 +96)(+125 +151 -150 -149 +148 -147 +146 -145 +144 +143 -142 -141 +140 +139 +138 -137 +136 +135 +134 +133 -132 -131 +130 -129 +128 -127 -126)(+152 +153 +154 +155 +156 -157 +158 +159 +160 -161 -162 +163 +164 -165 -166 +167 +168 +169 -170 -171 +172 +173 -174 +175 +176)
