In [13]:
from tools import read_fasta_file, reverse_complement

In [59]:
def find_reverse_palindromes(text):
    """
    Find all reverse palindromes (genomic strings that are equal to their reverse complements) 
    and their positions in a genome.
    
    Running time: O(N*k), where k is 8 on average
    """
    N = len(text)
    reverse_palindromes = list()
    for substring_length in [4, 6, 8, 10, 12]: 
        for i in range(N - substring_length + 1):        # O(N)
            subs = text[i:i+substring_length]
            # Reverse palindrome
            if is_reverse_palindrome(subs):    # O(|subs|) = between O(2*4) and O(12)
                reverse_palindromes.append((i+1, subs))
    return reverse_palindromes        

In [56]:
def is_reverse_palindrome(pattern):
    """
    Better algorithm, that compares the first half of a pattern to the reverse complement of the second.
    No need to compute all the string's reverse complement, and then compare all the caracters with the original string.
    
    Running time:  0(len|pattern|)
    """
    k = len(pattern)
    # if odd length, central character is never a reverse complement of itself
    if k % 2 == 1:
        return False
    else:
        complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
        for i in range(int(k/2)):
            if pattern[i] != complement[pattern[k-i-1]]:  # 0-5, 1-4, 2-3
                return False
    return True

In [57]:
def solve(path):
    d = read_fasta_file(path)
    for key in d:
        for i, pal in sorted(find_reverse_palindromes(d[key])):
            print(i, len(pal))#, pal)

In [60]:
solve('./txt/rosalind_revp_test.txt')

4 6
5 4
6 6
7 4
17 4
18 4
20 6
21 4


In [61]:
solve('./txt/rosalind_revp.txt')

1 4
7 6
8 4
23 6
24 4
44 6
45 4
57 6
58 4
65 4
73 4
95 4
104 4
108 8
109 6
110 4
139 4
153 6
154 4
163 6
164 4
173 8
174 6
175 4
229 4
234 4
239 4
244 4
246 4
269 6
270 4
271 6
272 4
281 4
284 4
290 4
299 6
300 4
306 4
321 4
372 6
373 4
380 4
389 4
400 4
408 4
429 4
437 6
438 4
440 4
448 6
449 4
460 4
463 4
484 4
484 12
485 10
486 8
487 6
488 4
492 4
502 6
503 4
527 4
530 4
534 6
535 4
542 4
548 4
557 4
580 4
581 4
597 4
598 4
647 4
647 6
648 4
648 6
649 4
650 4
670 6
671 4
711 4
714 4
730 4
743 4
759 4
774 6
775 4
776 6
777 4
799 4
834 4
852 4
854 4
860 4
887 6
888 4
890 4
903 6
904 4
952 4
955 4
972 4
972 6
973 4
973 6
974 4
975 4
