In [50]:
'''
Created June 25, 2020
@author Carl J. Raymond
'''

# Base complements, including "wobble" base pair U-G
baseComp = { 'A' : ['U'], 'C' : ['G'], 'G' : ['C', 'U'], 'U' : ['A', 'G'] }

# The cache stores computed values of perfectMatchCount, using the sequence as key.
cache = { }
cacheHits = 0

# Computes the number of perfect matchings in seq. Uses a cache for much more fastness.
def perfectMatchCount(seq):
	global cacheHits
	global cache
    
	# An empty sequence has a single match consisting of no edges.
	if not seq: return 1

	# Check the cache
	if (seq) in cache:
		cacheHits += 1
		return cache[(seq)]
	
	# Count matchings by using position 0 as the anchor, iterating all
	# possible matching edges between the anchor and its complements, and recursively
	# computing the possible matchings on either side of the edge. Sum the result
	# over all choices of the matching edge.
	anchorBase = seq[0]
	matchPositions = [k for k in range(4, len(seq)) if seq[k] in baseComp[anchorBase]]

	# Start by leaving the anchor unmatched, and match the remaining subsequence.
	sum = perfectMatchCount(seq[1:])

    # Match anchor with all legal possibilities
	for matchPos in matchPositions:
        
		# Count matchings of subgraphs on either side of the current edge
		left = perfectMatchCount(seq[1:matchPos])
		right = perfectMatchCount(seq[matchPos+1:])
		sum += left * right
	
	# Cache the result
	cache[seq] = sum
	
	return sum


with open("data/rosalind_rnas.txt") as spec:
    seq = spec.readline().strip()
#seq = "AUGCUAGUACGGAGCGAGUCUAGCGAGCGAUGUCGUGAGUACUAUAUAUGCGCAUAAGCCACGU"
#seq = "AUGCUAGUACGGAGCGAGUCUAGCGAGCGAUGUCGUG"
#seq = "CGAUGCUAG"
print("Seq:", seq)

print("Perfect match count: ", perfectMatchCount(seq))

print("Cache size: ", len(cache))
print("Cache hits: ", cacheHits)

Seq: UCAACUGUGUCACGCGCUUAUAUAAGAGAGUGUGGUUUAUCUAAGCAAUCUGGAAAAACAUAGAUGUCAGCUCCACGACGGAUUGCGACUCAAUUCACUGGUCUUACACUUAUGAGCUUCUGCUCCUAUUUGUCCCGACCGGGGUUGUACGUGACACAGCGUUCAUACGGGGAGCUAAAACCAGUCGAGUCGUUCAUGAU
Perfect match count:  48674310291083016301722281551379316098112576401994
Cache size:  18952
Cache hits:  927055
