## Eulerian Cycle Problem

In [23]:
from collections import defaultdict

def build_graph(text):
    lines = [line.strip().split(' -> ')  for line in text]
    edges = {
        line[0]: line[1].split(',')
        # int(line[0]): list(map(int, line[1].split(',')))
        for line in lines                   
    }
    return defaultdict(list, edges)

In [24]:
from copy import deepcopy
from random import choice

def random_walk(edges_, start_from=None):
    edges = deepcopy(edges_)
    start = start_from if start_from else list(edges.keys())[0] # choice(list(edges.keys()))
    ends = edges[start]
    if len(ends) > 1:
        end = ends.pop()
    else:
        end = edges.pop(start)[0]
    walk = [start, end]
    stuck = False
    while edges and not stuck:
        if end in edges:
            start = end
            ends = edges[start]
            if len(ends) > 1:
                end = ends.pop()
            else:
                end = edges.pop(start)[0]
            walk.append(end)
        else:
            stuck = True
    return walk, edges

In [25]:
from collections import deque

def eulerian_cycle(edges): # , start_from=None):
    cycle, remaining_edges = random_walk(edges) # , start_from)
    while remaining_edges:
        new_start = None
        cycle_ = deque(cycle[:-1])
        for index, node in enumerate(cycle_):
            if node in remaining_edges.keys():
                new_start = node
                break
        cycle_.rotate(-index)
        new_cycle, remaining_edges = random_walk(remaining_edges, start_from=new_start)
        cycle = list(cycle_)  + new_cycle
    return cycle

def display_cycle(cycle):
    return '->'.join(map(str, cycle))

In [26]:
sample_input = [
    '0 -> 3',
    '1 -> 0',
    '2 -> 1,6',
    '3 -> 2',
    '4 -> 2',
    '5 -> 4',
    '6 -> 5,8',
    '7 -> 9',
    '8 -> 7',
    '9 -> 6',
]

In [27]:
graph = build_graph(sample_input)
cycle = eulerian_cycle(graph)
assert cycle[0] == cycle[-1]
print(display_cycle(cycle))

0->3->2->6->8->7->9->6->5->4->2->1->0


In [32]:
def is_path(cycle, graph):
    edges = [
        (start, end)
        for start, ends in graph.items()
        for end in ends
    ]
    cycle_ = list(zip(cycle[:-1], cycle[1:]))
    # print(sorted(set(edges)-set(cycle_)))
    # print(sorted(set(cycle_)-set(edges)))
    return sorted(edges) == sorted(cycle_)

In [33]:
def is_eulerian(cycle):
    cycle_ = list(zip(cycle[:-1], cycle[1:]))
    return sorted(list(set(cycle_))) == sorted(cycle_)

In [34]:
input_filename = 'dataset_203_2'
with open(f'data/{input_filename}.txt', 'r') as input_file:
    test_input = input_file.readlines()

In [35]:
graph = build_graph(test_input)
cycle = eulerian_cycle(graph)
assert cycle[0] == cycle[-1]
assert is_eulerian(cycle)
assert is_path(cycle, graph)

In [36]:
output_filename = 'submission_' + '_'.join(input_filename.split('_')[1:])
with open(f'data/{output_filename}.txt', 'w') as output_file:
    output_file.write(display_cycle(cycle))

---
## Eulerian Path Problem

In [37]:
sample_input = [
    '0 -> 2',
    '1 -> 3',
    '2 -> 1',
    '3 -> 0,4',
    '6 -> 3,7',
    '7 -> 8',
    '8 -> 9',
    '9 -> 6',
]

# out_deg(4)=0, in_deg(4)=1
# out_deg(3)=2, in_deg(3)=2
# out_deg(6)=2, in_deg(6)=1

In [38]:
from functools import reduce
from operator import add
from collections import Counter

def eulerian_path(edges):
    in_nodes = list(edges.keys())
    out_nodes = reduce(add, list(graph.values()))
    in_degrees = Counter(out_nodes)
    out_degrees = Counter({key: len(value) for key, value in graph.items()})
    start = end = None
    for node in set(out_nodes + in_nodes):
        difference = out_degrees[node] - in_degrees[node]
        if difference > 0:
            start = node
        if difference < 0:
            end = node
    if not start or not end:
        return []
    augmented_edges = {end: [start]}
    augmented_edges.update(edges)
    cycle = eulerian_cycle(augmented_edges)
    path = deque(cycle[:-1])
    path.rotate(-1 - path.index(end))
    return list(path)

In [39]:
graph = build_graph(sample_input)
print('Original: ', dict(graph))
path = eulerian_path(graph)  
print(display_cycle(path))

Original:  {'0': ['2'], '1': ['3'], '2': ['1'], '3': ['0', '4'], '6': ['3', '7'], '7': ['8'], '8': ['9'], '9': ['6']}
6->7->8->9->6->3->0->2->1->3->4


In [40]:
input_filename = 'dataset_203_6'
with open(f'data/{input_filename}.txt', 'r') as input_file:
    test_input = input_file.readlines()

In [41]:
graph = build_graph(test_input)
path = eulerian_path(graph)

In [42]:
output_filename = 'submission_' + '_'.join(input_filename.split('_')[1:])
with open(f'data/{output_filename}.txt', 'w') as output_file:
    output_file.write(display_cycle(path))

---
## String Reconstruction Problem

In [44]:
import import_ipynb  

In [57]:
from Week1 import debruijn_graph_from_kmers, path_to_genome;

In [64]:
sample_input = [
    'CTTA',
    'ACCA',
    'TACC',
    'GGCT',
    'GCTT',
    'TTAC',
]

In [65]:
sample_output = 'GGCTTACCA'

In [66]:
graph = debruijn_graph_from_kmers(sample_input)

In [67]:
path = eulerian_path(graph)
genome = path_to_genome(path)

In [72]:
assert path_to_genome(path) == sample_output

In [83]:
input_filename = 'dataset_203_7'
with open(f'data/{input_filename}.txt', 'r') as input_file:
    test_input = input_file.readlines()

In [84]:
graph = debruijn_graph_from_kmers([line.strip() for line in test_input[1:]])

In [85]:
path = eulerian_path(graph)
genome = path_to_genome(path)

In [86]:
output_filename = 'submission_' + '_'.join(input_filename.split('_')[1:])
with open(f'data/{output_filename}.txt', 'w') as output_file:
    output_file.write(genome)

---
## k-Universal Circular String Problem

In [238]:
from Week1 import is_universal

In [228]:
from itertools import product

In [229]:
def binary_strings(k):
    kmers_ = product('01', repeat=k)
    kmers = [''.join(combo) for combo in kmers_]
    return sorted(kmers)

In [259]:
def universal_circular_string(k):
    kmers = binary_strings(k)
    graph = debruijn_graph_from_kmers(kmers)
    cycle = eulerian_cycle(graph)
    genome = path_to_genome(cycle[:-(k-1)])
    return genome

In [260]:
def is_universal_circular(binary_string, k):
    return is_universal(binary_string + binary_string[:k-1], k)

In [269]:
sample_k = 4
sample_output = '0000110010111101'
sample_result = universal_circular_string(sample_k)
assert len(sample_result) == 2 ** sample_k
assert is_universal_circular(sample_result, sample_k)

In [262]:
input_filename = 'dataset_203_11'
with open(f'data/{input_filename}.txt', 'r') as input_file:
    test_input = input_file.readlines()
    test_k = int(test_input[0].strip())

In [264]:
result = universal_circular_string(test_k)

In [265]:
output_filename = 'submission_' + '_'.join(input_filename.split('_')[1:])
with open(f'data/{output_filename}.txt', 'w') as output_file:
    output_file.write(result)

---
## ?