In [7]:
def FormProfileWithPseudocounts(motifs, k, pseudocount=1.):
    profile = {x: [float(pseudocount)] * k for x in ['A', 'G', 'C', 'T']}
    div = len(motifs)
    for i in range(k):
        for motif in motifs:
            profile[motif[i]][i] += 1
        for symbol in profile:
            profile[symbol][i] /= div
    return profile

In [8]:
def Score(motifs, k):
    score = 0
    for i in range(k):
        count = {x: 0 for x in ['A', 'G', 'C', 'T']}
        for motif in motifs:
            count[motif[i]] += 1
        score += k - max(count.values())
    return score

In [9]:
def ProfileMostProbableKmer(text, k, profile):
    max_prob = 0
    k_mer = text[: k]
    for i in range(len(text) - k + 1):
        pattern = text[i: i + k]
        prob = 1
        for n, symbol in enumerate(pattern):
            prob *= profile[symbol][n]
        if max_prob < prob:
            max_prob = prob
            k_mer = pattern
    return k_mer

In [10]:
def FormMotifs(profile, dna, k):
    return [ProfileMostProbableKmer(string, k, profile) for string in dna]

In [11]:
import random


def RandomizedMotifSearch(dna, k, t):
    random_numbers = random.sample(range(len(dna[0]) - k + 1), t)
    motifs = [string[i: i + k] for i, string in zip(random_numbers, dna)]
    best_motifs = motifs[:]
    while True:
        profile = FormProfileWithPseudocounts(motifs, k)
        motifs = FormMotifs(profile, dna, k)
        if Score(motifs, k) < Score(best_motifs, k):
            best_motifs = motifs[:]
        else:
            return best_motifs

In [12]:
with open('/content/drive/My Drive/Colab Notebooks/Rosalind/rosalind_ba2f.txt') as data:
    lines = data.readlines()
    dna = [line.rstrip('\n') for line in lines[1:]]
    k, t = map(int, lines[0].rstrip('\n').split())
    best_motifs = RandomizedMotifSearch(dna, k, t)
    for _ in range(1000):  # 1000 iterations to maximize probability of convergence
        new = RandomizedMotifSearch(dna, k, t)
        if Score(new, k) < Score(best_motifs, k):
            best_motifs = new[:]
    print(
        'RandomizedMotifSearch Implementation -',
        *best_motifs,
        sep='\n'
    )

RandomizedMotifSearch Implementation -
CGACGATTTACGTCC
CCTCACTGTCCGACC
CCTACATGTGGTACC
CAGCCATGTCCGACC
CCTACCCTTCCGACC
CCTACATGTCGATCC
CCTACATGATTGACC
CCTAATAGTCCGACC
CCTACCATTCCGACC
CCTAAGGGTCCGACC
CCTACAGTGCCGACC
ACTACATGTCCGAAA
CCTGTCTGTCCGACC
ATTACATGTCCGACT
CCTACATCATCGACC
CCTACACCACCGACC
CCACAATGTCCGACC
GAAACATGTCCGACC
CCTACATGTCCAGGC
CCTACATGTCCGTAG
