In [55]:
def profile_matrix_with_pseudocounts(motifs):
    k = len(motifs[0])
    profile = {'A': [1] * k, 'C': [1] * k, 'G': [1] * k, 'T': [1] * k}
    total_motifs = len(motifs) + 4  # pseudocounts for each nucleotide

    for i in range(k):
        for motif in motifs:
            profile[motif[i]][i] += 1

    for nucleotide in profile:
        for i in range(k):
            profile[nucleotide][i] /= total_motifs

    return profile

In [56]:
def profile_most_probable_kmer(text, k, profile):
    max_prob = -1
    most_probable_kmer = text[:k]

    for i in range(len(text) - k + 1):
        kmer = text[i:i + k]
        prob = 1
        for j in range(k):
            prob *= profile[kmer[j]][j]
        if prob > max_prob:
            max_prob = prob
            most_probable_kmer = kmer

    return most_probable_kmer

In [57]:
def greedy_motif_search_with_pseudocounts(dna, k, t):
    best_motifs = [string[:k] for string in dna]

    for i in range(len(dna[0]) - k + 1):
        motifs = [dna[0][i:i + k]]

        for j in range(1, t):
            profile = profile_matrix_with_pseudocounts(motifs)
            motifs.append(profile_most_probable_kmer(dna[j], k, profile))

        if score_motifs(motifs) < score_motifs(best_motifs):
            best_motifs = motifs

    return best_motifs

In [58]:
def score_motifs(motifs):
    score = 0
    k = len(motifs[0])

    for i in range(k):
        column = [motif[i] for motif in motifs]
        most_common = max(set(column), key=column.count)
        score += sum(1 for nucleotide in column if nucleotide != most_common)

    return score

In [59]:
Dna = 'GGCGTTCAGGCA AAGAATCAGTCA CAAGGAGTTCGC CACGTCAATCAC CAATAATATTCG'
dna_list = Dna.split(' ')
k = 4
t = 5
result = greedy_motif_search_with_pseudocounts(dna_list, k, t)
print(" ".join(result))

GTTC AATC GTTC AATC ATTC
