In [None]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(open("rosalind_gc.txt"), 'fasta')

seqs = []
for fasta in fasta_sequences:
    name, sequence = fasta.id, fasta.seq.tostring()
    seqs.append((name, sequence))

def gc(seqn):
    name, seqnc = seqn
    gconly = [z for z in seqnc if z in ['G', 'C']]
    return (name, 100.0 * len(gconly) / float(len(seqnc)))

print("%s\n%s" % max(map(gc, seqs), key=lambda z: z[1]))


In [None]:
def hamming(seqs):
    xs, ys = seqs
    return len([(x,y) for x,y in zip(xs, ys) if x != y])


In [None]:
def countpairwise(n):
    return n*(n-1)/2.0

def mendel(k,m,n):
    total = countpairwise(k+m+n)
    
    i1 = k*m + k*n + (2/4)*m*n
    i2 = countpairwise(k)
    i3 = countpairwise(m) * (3.0 / 4.0)
    return (i1+i2+i3)/total

mendel(26, 28, 26)

In [None]:
rnaToP = {
  "UUU": "F",
  "CUU": "L",
  "AUU": "I",
  "GUU": "V",
  "UUC": "F",
  "CUC": "L",
  "AUC": "I",
  "GUC": "V",
  "UUA": "L",
  "CUA": "L",
  "AUA": "I",
  "GUA": "V",
  "UUG": "L",
  "CUG": "L",
  "AUG": "M",
  "GUG": "V",
  "UCU": "S",
  "CCU": "P",
  "ACU": "T",
  "GCU": "A",
  "UCC": "S",
  "CCC": "P",
  "ACC": "T",
  "GCC": "A",
  "UCA": "S",
  "CCA": "P",
  "ACA": "T",
  "GCA": "A",
  "UCG": "S",
  "CCG": "P",
  "ACG": "T",
  "GCG": "A",
  "UAU": "Y",
  "CAU": "H",
  "AAU": "N",
  "GAU": "D",
  "UAC": "Y",
  "CAC": "H",
  "AAC": "N",
  "GAC": "D",
  "UAA": "Stop",
  "CAA": "Q",
  "AAA": "K",
  "GAA": "E",
  "UAG": "Stop",
  "CAG": "Q",
  "AAG": "K",
  "GAG": "E",
  "UGU": "C",
  "CGU": "R",
  "AGU": "S",
  "GGU": "G",
  "UGC": "C",
  "CGC": "R",
  "AGC": "S",
  "GGC": "G",
  "UGA": "Stop",
  "CGA": "R",
  "AGA": "R",
  "GGA": "G",
  "UGG": "W",
  "CGG": "R",
  "AGG": "R",
  "GGG": "G"
}

def translate(seqc,acc):
    if len(seqc) < 3:
        return True, acc 
    triple = ''.join(seqc[0:3])
    prot = rnaToP[triple]
    if prot == "Stop":
        return True, acc 
    return seqc[3:len(seqc)], acc + [prot]

f = open("rosalind_prot.txt")
seqc = f.read()
acc = []
while seqc != True:
    seqc, acc = translate(seqc, acc)
f.close()

''.join(acc)

In [None]:
def findin(haystack, pin):
    print(haystack.rstrip())
    print(pin.rstrip())
    i = haystack.find(pin)
    res = []
    while i != -1:
        res = res + [i + len(res)]
        haystack = haystack[1:len(haystack)]
        i = haystack.find(pin)
    return [x+1 for x in list(set(res))]

f = open("rosalind_subs.txt")
haystack = f.readline().rstrip()
pin = f.readline().rstrip()
res = ' '.join([str(x) for x in findin(haystack, pin)])
f.close()
res

In [None]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(open("rosalind_cons.txt"), 'fasta')
strands = [list(str(fasta.seq)) for fasta in fasta_sequences]

def c(key, col):
   return str(len([x for x in col if x == key]))

nucleotides = ['A', 'C', 'G', 'T']
profmat = [[c('A', x), c('C', x), c('G', x), c('T', x)]
            for x in zip(*strands)]
consensus = [nucleotides[l.index(max(l))] for l in profmat]
profmat = zip(nucleotides, zip(*profmat))

print("".join(consensus))
print("\n".join([k + ": " + ' '.join(v) for k,v in profmat]))

In [None]:
def mortal_rabbits(months, expectency):
    seqc = [1,1]
    while len(seqc) < months:
        nex = sum(seqc[-2:])
        if len(seqc) == expectency:
            nex -= seqc[-expectency] 
        elif len(seqc) > expectency:
            nex -= seqc[-(expectency+1)]
        seqc.append(nex)
    return seqc[-1]
mortal_rabbits(88, 18)

In [None]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(open("rosalind_grph.txt"), 'fasta')
strands = [(fasta.id, str(fasta.seq)) for fasta in fasta_sequences]

def connected(x,y,k):
    return x != y and x[-k:] == y[:k] 

graph = [(x[0],y[0]) 
         for x in strands 
         for y in strands 
         if connected(x[1],y[1],3)]

print("\n".join(["%s %s" % e for e in graph]))

In [None]:
domProbs = [1.0, 1.0, 1.0, 0.75, 0.5, 0]

def expectation(pops, offSpring):
    return offSpring * sum([pop * p for (pop, p) in zip(pops, domProbs)])

expectation([18855, 19867, 18691, 19126, 17045, 16627], 2)

In [12]:
from itertools import product
from functools import reduce
from Bio import SeqIO

def lcs(s1, s2):
    if not s1 or not s2:
        return [""]

    memo = [[None for x in range(len(s2)+1)] for j in range(len(s1)+1)]
    memo[0] = [0 for _ in memo[0]]
    for m in memo:
        m[0] = 0
 
    longest_i = [] 
    mx = -1
    for i in range(1,len(s1)+1):
        for j in range(1,len(s2)+1):
            if s1[i-1] == s2[j-1]:
                memo[i][j] = memo[i-1][j-1] + 1
                if memo[i][j] == mx:
                    longest_i.append(i-1)
                elif memo[i][j] > mx:
                    mx = memo[i][j]
                    longest_i = [i-1]
            else:
                memo[i][j] = 0
    
    mx = max([max(l) for l in memo])
    return [s1[(li-mx+1):(li+1)]
            for li in longest_i 
            if mx >= 0]

def multilcs(strs):
    lcss = [lcs(s1,s2) for s1,s2 in product(strs,strs) if s1 != s2]
    lcss = set([i for l in lcss for i in l])
    minlen = min([len(x) for x in lcss])
    minlcs = [x for x in lcss if len(x) == minlen]
    otherlcs = [x for x in lcss if x != minlen]
        
    return [x for x in minlcs if any([ x < y for y in otherlcs])]


fasta_sequences = SeqIO.parse(open("rosalind_lcsm.txt"), 'fasta')
strands = [(fasta.id, str(fasta.seq)) for fasta in fasta_sequences]

multilcs([x[1] for x in strands])[0]

'TTACCAATTCGCTTAATCTATGTTACGTATTCGAGAGAAGAATATTTGACAAGAAAAGTTGTTGCCCGCAGGTTTCATCCTGGCGCCAGATAATAAAATGATGTTATACCTAAGCTCGGACTACAGATAACCAAGAAAAGATGGCAACCGCACGAGAGCACACTCGCCTATTACATAGGTGGCTTTGGGGAGACACTGTTACCTACGGCCATACTGTGGTATGCCTGGACCCAAACACATGCACGACTCGGGTAGGTTGCTCCACGTACAAGCTCACTCATTACGACA'