In [60]:
tab_input = 'ATGC'
tab_output = 'TACG'

In [61]:
strand = 'AGTCGCATAGT'

In [62]:
rev_strand = strand[::-1]

In [63]:
trans_tab = str.maketrans(tab_input, tab_output)

In [64]:
rev_strand.translate(trans_tab)

'ACTATGCGACT'

In [65]:
see = ['AGTCGCATAGT', 'AGTCGCATAGG', 'AGTCGCATAGC', 'AGTCGCATAGA', 'AGTCGCATATT']

In [66]:
rev_see = list(map(lambda x: x[::-1].translate(trans_tab), see))

In [67]:
rev_see

['ACTATGCGACT', 'CCTATGCGACT', 'GCTATGCGACT', 'TCTATGCGACT', 'AATATGCGACT']

In [68]:
dict(zip(see, rev_see))

{'AGTCGCATAGA': 'TCTATGCGACT',
 'AGTCGCATAGC': 'GCTATGCGACT',
 'AGTCGCATAGG': 'CCTATGCGACT',
 'AGTCGCATAGT': 'ACTATGCGACT',
 'AGTCGCATATT': 'AATATGCGACT'}

In [69]:
import numpy as np
import itertools as it
from collections import defaultdict

In [70]:
alphabet = list('ATGC')
def d_mismatch_generator(pattern, d):
    k = len(pattern)
    
    idx_picked = it.combinations(range(k), r=d)
    letters_picked = it.product(alphabet, repeat=d)
    
    pairs = it.product(idx_picked, letters_picked)
    
    base_pattern = np.array(list(pattern))
    
    d_mismatches = []
    for idxs, letters in pairs:
        temp = base_pattern.copy()
        
        temp[list(idxs)] = letters
        
        
        d_mismatches.append(''.join(temp))
    
    return d_mismatches

In [71]:
def get_mismatches(pattern, d):
    
    mismatch_list = []
    for k in range(1, d+1):
        mismatch_list += d_mismatch_generator(pattern, k)
    
    return np.unique(mismatch_list).tolist()

In [72]:
tab_input = 'ATGC'
tab_output = 'TACG'
trans_tab = str.maketrans(tab_input, tab_output)

def reversed_complement(strands):
    
    rev_comp_strands = list(map(lambda x: x[::-1].translate(trans_tab), strands))
    
    return dict(zip(strands, rev_comp_strands))

In [73]:
def most_freq_mismatches_with_rc(text, k, d):
    
    freqmap = defaultdict(int)
    pattern_list = []
    
    strand_list = []
    
    for i in range(len(text)-k+1):
        pattern = text[i:i+k]
        
        mismatch_list = get_mismatches(pattern, d)
        
        strand_list += mismatch_list
        
        for kmer in mismatch_list:
            freqmap[kmer]+=1
            
        # the same for the reversed complement
        rc_pattern = pattern[::-1].translate(trans_tab)
        mismatch_rc_list = get_mismatches(rc_pattern, d)
        
        for kmer in mismatch_rc_list:
            freqmap[kmer]+=1
        
    max_count = max(freqmap.values())
    
    for pattern, count in freqmap.items():
        if count == max_count:
            pattern_list.append(pattern)
        
    return pattern_list

In [74]:
text = 'CTTGCCGGCGCCGATTATACGATCGCGGCCGCTTGCCTTCTTTATAATGCATCGGCGCCGCGATCTTGCTATATACGTACGCTTCGCTTGCATCTTGCGCGCATTACGTACTTATCGATTACTTATCTTCGATGCCGGCCGGCATATGCCGCTTTAGCATCGATCGATCGTACTTTACGCGTATAGCCGCTTCGCTTGCCGTACGCGATGCTAGCATATGCTAGCGCTAATTACTTAT'

In [75]:
k, d = 9, 3

In [76]:
most_freq_mismatches_with_rc(text, k, d)

['AGCGGCGCT', 'AGCGCCGCT']

In [81]:
def main():
    
    file = open('rosalind_ba1j.txt', 'r')
    
    text = next(file).strip()
    k, d = list(map(int, next(file).split()))
    
    print(" ".join(most_freq_mismatches_with_rc(text, k, d)))

    file.close()

In [82]:
if __name__ == "__main__":
    main()

TTTAAA
