In [33]:
'''
Created June 25, 2020
@author Carl J. Raymond
'''

from IPython.core.debugger import set_trace

# Base complements, including "wobble" base pair U-G
baseComp = {
	'A' : ['U'],
	'C' : ['G'],
	'G' : ['C', 'U'],
	'U' : ['A', 'G']
}

# The cache stores computed values of perfectMatchCount, using a key which is
# a tuple (start, stop).
cache = { }
cacheHits = 0

# Computes the number of perfect matchings in seq from position start (incl.)
# to stop (excl.) Uses a cache for much more fastness.
def perfectMatchCount(seq, start, stop):
	global cacheHits
	
	# Check the cache
	if (start, stop) in cache:
		cacheHits += 1
		return cache[(start, stop)]
	
	# An empty interval has a single match consisting of no edges.
	if start+4 >= stop:
		return 1

	# Count matchings by using the start position as the anchor, iterating all
	# possible matching edges between the anchor and its complements, and recursively
	# computing the possible matchings on either side of the edge. Sum the result
	# over all choices of the matching edge.
	anchor = seq[start]
	matchPositions = [k for k in range(start+4, stop) if seq[k] in baseComp[anchor]]

	# Start by not matching the anchor with any other node
	sum = perfectMatchCount(seq, start+1, stop)

	for matchPos in matchPositions:
        
		# Count matchings of subgraphs on either side of the edge (start, matchPos)
		left = perfectMatchCount(seq, start+1, matchPos)
		right = perfectMatchCount(seq, matchPos+1, stop)
		sum += left * right
	
	# Cache the result
	cache[(start, stop)] = sum
	
	return sum


#with open("data/rosalind_rnas.txt") as spec:
#    seq = spec.readline()
#seq = "AUGCUAGUACGGAGCGAGUCUAGCGAGCGAUGUCGUGAGUACUAUAUAUGCGCAUAAGCCACGU"
#seq = "CGAUGCUAG"
print("Seq:", seq)

counts = { 'A': 0, 'C': 0, 'G': 0, 'U': 0 }
for b in seq:
    counts[b] += 1

print("Counts: ", counts)
print("Perfect match count: ", perfectMatchCount(seq, 0, len(seq)))

print("Cache size: ", len(cache))
print("Cache hits: ", cacheHits)

Seq: CGAUGCUAG
Counts:  {'A': 2, 'C': 2, 'G': 3, 'U': 2}
Perfect match count:  12
Cache size:  8
Cache hits:  0


In [None]:
284850219977421