In [None]:
def composition(k, text):
    """
    Assuming k is an integer and text is a string of nucleotides, return the list of all k-mers in text (including repeated k-mers).
    >>> composition(5, "CAATCCAAC")
    ['CAATC', 'AATCC', 'ATCCA', 'TCCAA', 'CCAAC']
    """
    kmers = []
    length = len(text)
    for i in range(0, length-k+1):
        kmers.append(text[i:i+k])
    #print(*kmers, sep=" ")
    return kmers

In [None]:
def PathToGenome(path):
    """
    Assuming the input is a sequence path of n k-mers split with spaces, the consecutive ones of which shares an overlap whose length is k-1;
    return an assembled string.
    >>> PathToGenome("ACCGA CCGAA CGAAG GAAGC AAGCT")
    ACCGAAGCT
    """
    kmers = path.split()
    return kmers[0] + ''.join(kmer[-1] for kmer in kmers[1:])

In [None]:
def Overlap(patterns):
    """
    Assuming the input is a string of k-mers split with spaces, return the overlap graph in the form of an adjacency list
    >>> Overlap("ATGCG GCATG CATGC AGGCA GGCAT GGCAC")
    {'GCATG': ['CATGC'],
    'CATGC': ['ATGCG'],
    'AGGCA': ['GGCAT', 'GGCAC'],
    'GGCAT': ['GCATG']}
    """
    kmers = patterns.split()
    prefix = {kmer: kmer[:-1] for kmer in kmers}
    suffix = {kmer: kmer[1:] for kmer in kmers}

    adjacency = {kmer: [] for kmer in kmers}
    for kmer in kmers:
        for key, value in prefix.items():
            if suffix[kmer] == value:
                adjacency[kmer].append(key)
    
    # Remove all the keys in adjacency if its value is empty list
    adjacency = {key: value for key, value in adjacency.items() if value}
    # print('\n'.join(f'{kmer}: {" ".join(adj)}' for kmer, adj in adjacency.items()))
    return adjacency

In [None]:
def DeBruijn_text(k, text):
    """
    Assuming k is an integer greater than 0 and text is a string whose length is greater than or equal to k,
    the output is an adjacency list where all the identically labelled nodes are glued.
    >>> DeBruijn(4, "AAGATTCTCTAAGA")
    {'AAG': ['AGA', 'AGA'],
    'AGA': ['GAT'],
    'GAT': ['ATT'],
    'ATT': ['TTC'],
    'TTC': ['TCT'],
    'TCT': ['CTC', 'CTA'],
    'CTC': ['TCT'],
    'CTA': ['TAA'],
    'TAA': ['AAG']}
    """
    length = len(text)
    kmers = [text[i:i+k-1] for i in range(0, length-k+2)]
    adjacency = {kmer: [] for kmer in kmers}
    for i in range(0, len(kmers)-1):
        adjacency[kmers[i]].append(kmers[i+1])
    
    # Remove all the keys in adjacency if its value is empty list
    adjacency = {key: value for key, value in adjacency.items() if value}

    #print('\n'.join(f'{kmer}: {" ".join(adj)}' for kmer, adj in adjacency.items()))
    return adjacency

In [69]:
def DeBruijn_pattern(patterns):
    """
    Assume the input is a collection of k-mer patterns split with spaces,
    the output is an adjacency list of the de Bruijn graph
    >>> DeBruijn_pattern("GAGG CAGG GGGG GGGA CAGG AGGG GGAG")

    """
    

#DeBruijn_pattern("GAGG CAGG GGGG GGGA CAGG AGGG GGAG")