In [2]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(open("data/rosalind_gc.txt"), 'fasta')

seqs = []
for fasta in fasta_sequences:
    name, sequence = fasta.id, fasta.seq.tostring()
    seqs.append((name, sequence))

def gc(seqn):
    name, seqnc = seqn
    gconly = [z for z in seqnc if z in ['G', 'C']]
    return (name, 100.0 * len(gconly) / float(len(seqnc)))

print("%s\n%s" % max(map(gc, seqs), key=lambda z: z[1]))


Rosalind_1514
52.94840294840295




In [3]:
def hamming(seqs):
    xs, ys = seqs
    return len([(x,y) for x,y in zip(xs, ys) if x != y])


In [4]:
def countpairwise(n):
    return n*(n-1)/2.0

def mendel(k,m,n):
    total = countpairwise(k+m+n)
    
    i1 = k*m + k*n + (2/4)*m*n
    i2 = countpairwise(k)
    i3 = countpairwise(m) * (3.0 / 4.0)
    return (i1+i2+i3)/total

mendel(26, 28, 26)

0.7520569620253165

In [5]:
rnaToP = {
  "UUU": "F",
  "CUU": "L",
  "AUU": "I",
  "GUU": "V",
  "UUC": "F",
  "CUC": "L",
  "AUC": "I",
  "GUC": "V",
  "UUA": "L",
  "CUA": "L",
  "AUA": "I",
  "GUA": "V",
  "UUG": "L",
  "CUG": "L",
  "AUG": "M",
  "GUG": "V",
  "UCU": "S",
  "CCU": "P",
  "ACU": "T",
  "GCU": "A",
  "UCC": "S",
  "CCC": "P",
  "ACC": "T",
  "GCC": "A",
  "UCA": "S",
  "CCA": "P",
  "ACA": "T",
  "GCA": "A",
  "UCG": "S",
  "CCG": "P",
  "ACG": "T",
  "GCG": "A",
  "UAU": "Y",
  "CAU": "H",
  "AAU": "N",
  "GAU": "D",
  "UAC": "Y",
  "CAC": "H",
  "AAC": "N",
  "GAC": "D",
  "UAA": "Stop",
  "CAA": "Q",
  "AAA": "K",
  "GAA": "E",
  "UAG": "Stop",
  "CAG": "Q",
  "AAG": "K",
  "GAG": "E",
  "UGU": "C",
  "CGU": "R",
  "AGU": "S",
  "GGU": "G",
  "UGC": "C",
  "CGC": "R",
  "AGC": "S",
  "GGC": "G",
  "UGA": "Stop",
  "CGA": "R",
  "AGA": "R",
  "GGA": "G",
  "UGG": "W",
  "CGG": "R",
  "AGG": "R",
  "GGG": "G"
}

def translate(seqc,acc):
    if len(seqc) < 3:
        return True, acc 
    triple = ''.join(seqc[0:3])
    prot = rnaToP[triple]
    if prot == "Stop":
        return True, acc 
    return seqc[3:len(seqc)], acc + [prot]

f = open("data/rosalind_prot.txt")
seqc = f.read()
acc = []
while seqc != True:
    seqc, acc = translate(seqc, acc)
f.close()

''.join(acc)

'MTLCESLGAARTHHRSGQPTSKVGDHSWRVIRAARPNVRVFALNPMSRSSRPSDFNIHKIIMPAEQPSGGFTILYYGYPTEGVDPKSTNKIPGCSAYRKRVCGRDFEGVVDYWQRCGRVCAIANAELKPVLDGPRTVTLLNCGYASALTRSACPRRFRGVSFPLYWLRTSQDTFNVRLSIYRMGVLDILLAYGLQQRNNRCNARRGPQNAYEYHRGQRRHVGLNKYRDFAYTIVNNLSGRAIRSLLLKGARRFHVFYKIGSHLERAGSPRIECLSLRAKGLRDIFQPLRLDSDIRINLPDKVLGESAYTLSKARGLVTLVPKPAIVYQGASRYTRHQEQKELCRHFLSCFAAIFTDLSYLHCCYIELQLALFRLACQARAVYQGRLPESISPQGKNARLKRFECRTRLNRAEPLRPVWWYARIAQDRLRPGRILCRCYECFGQQPCPLSKHGSVCDRAQEMTKLSVRQRILGDGMSWGNLSMSPLWYYNAHVYLSEYTRVSVNSNPHSKSATKGGREHWRHVGHKTKACTVLQFRPPPRGPCRTIRFQGAVIQVAYISCIAMYILYTHMGSLCTWRSGAVMQTEIAQQLRIFTGGSSSALSRGPSPERWPSWILVVNPHHSPVPRGSPAVNQTTHVPCGAAGIVSIYEKMPILYPLELRFRADNRERYATQSSRDSIHCGLRRTVVPQSVTHKEAAQRKRSRLSPGGRNSRGARKQAIVHSCGFEAGIVLSRWITISTDIAGRLSVSINLRGVERNLELDAGQGSGPSIGRPDSGPPCRGITRLRVHSMMWRLDRTSLYDSTTRGVHLPLKLCQSGPQAPLRVVSLALQPENTATNQALFRLYSYVGTCNDAVGRSQDENLRTVVHVAVETLALVAEPWDGCPYQECKSWYNACTPRVKVTDHSPRRRQFKEAYIVVVGRSISSDETLARSSFSIHQPFTVRSSGLRPSLESTPRALWRTIDIPLDASLTGRSLAFSSSSNIWWVWAVSYDLYRHHEGV

In [6]:
def findin(haystack, pin):
    print(haystack.rstrip())
    print(pin.rstrip())
    i = haystack.find(pin)
    res = []
    while i != -1:
        res = res + [i + len(res)]
        haystack = haystack[1:len(haystack)]
        i = haystack.find(pin)
    return [x+1 for x in list(set(res))]

f = open("data/rosalind_subs.txt")
haystack = f.readline().rstrip()
pin = f.readline().rstrip()
res = ' '.join([str(x) for x in findin(haystack, pin)])
f.close()
res

TAATAGCTAATAGCCGCGTAATAGCCCTGTTGTCATAATAGCCGTAATAGCTAATAGCTAATAGCCTAATAGCTAATAGCAGACTACGAGTAATAGCTCTAATAGCCGTAATAGCTAATAGCTAATAGCTAATAGCTTATCTTAATAGCTAATAGCTTAATAGCTAATAGCTTAATAGCTTTTGTCCACGGGCTTCCTAATAGCTAGTTAATAGCTAATAGCGATAATAGCAGCTAATAGCGGTAATAGCTAATAGCCTAATAGCATAATAGCTAATAGCAAAGATAATAGCTAATAGCCTGCCTATATAATAGCGTAATAGCCTAATAGCATAATAGCGGATAATAGCCTACTAATAGCTAATAGCAATAATAGCTTCCTAATAGCAAGGATAATAGCGAGGTCTAATAGCTCCGACTTTAATAGCGGTAATAGCATTAATAGCCCACCATAATAGCGTAATAGCACCTTAATAGCGGTAATAGCTTCAATATAATAGCATAATAGCTAATAGCTAAGAGACTGTAATAGCGGGGGGGAATTAATAGCTAATTGAGAAACTTTAATAGCTAATAGCCCTGGTTTTAATAGCTAATAGCAAGTAATAGCTAATAGCTAATAGCATCGTAATAGCAGATAATAGCGCCGTTAATAGCTTTAATAGCAATATAATAGCCCGCGGTAATAGCATAATAGCAGGATAATAGCTAATAGCTAATAGCTATATAATAGCGGTAATAGCCAATAATAGCCATGCTTTACCGCTTAATAGCACTTAATAGCAATAATAGCACTAATAGCCGATTAATAGCGCGCTATAATAGCACCCTAATAGCCATAATAGCGGATTAATAGCTGTAATAGC
TAATAGCTA


'1 267 143 286 158 543 45 52 564 702 67 709 198 586 716 209 603 354 610 109 116 244 502 123 509'

In [7]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(open("data/rosalind_cons.txt"), 'fasta')
strands = [list(str(fasta.seq)) for fasta in fasta_sequences]

def c(key, col):
   return str(len([x for x in col if x == key]))

nucleotides = ['A', 'C', 'G', 'T']
profmat = [[c('A', x), c('C', x), c('G', x), c('T', x)]
            for x in zip(*strands)]
consensus = [nucleotides[l.index(max(l))] for l in profmat]
profmat = zip(nucleotides, zip(*profmat))

print("".join(consensus))
print("\n".join([k + ": " + ' '.join(v) for k,v in profmat]))

ACTCCCCAGCATTCTAACTGTGGGATACAACAGTGGCTGGACATACCCACGTTTAAGTAAGGAACAGGAAGCCAGACGAACGTAGCCCAAAGACATGGTTCCACCACAGAGCTTCCCTTCGTGCTAGGGCGACACGATCTCTCCGAAACAATAACGGCGAGGGGTAACCAAAATAGACCAGCACAAACTCAATAATGGCGCAAAACACCAAAACTCCACCGAATGCAAAGGTAAATTCAGATCCACAAAGTCCGCGAGGCTGCCGAGTGCGTCCCCCGACTAAGAAAAGGCGACGGCCGCAGACTCGGGAACTGACGTATTAGAACAGGTGCCAAGCAACCTGCAGTGGAGGCAGAACCGCATAAAATCAAGGCACCACGTACTGGTCAGCACGACACCAGTTACAGCAGCCTTCGGATTACCCGGTGACGGGACATTGGCTTACCCGACCCCAAATAGCGTCCCCCTTACAAATTACTAACCACCTTTAGAAACCCGGGATCTGCAACCGAGCAGGCAGAACCCGGGCCGGCGCTGTACTCCCGGCCCATCTCACATGCTAGTCGGACCACGTCACATGATGCGCCCCAATTCGTCCAGGAACACCCCAAGAAAAAACGTTGCCTAGGCGAAAATTTACCGGAATTACGACTAGGGCACAGGTAGTGGAATTTTATGAAACTAGAGTGCAGCCTAAGGCGTTCGGCAGATGCAATTCCCTTATAGCTTCTGCCAAATAAAAAAGCACCAATTCCCCCCTGTCAGTATAGAAACACAGGTAAAGGAATCCACGAGTCCAAGGCCGTAGATGTCGGGCTCACAAGACAACATGTAGCAGAAGCTGGAACCCAAAAATCAGTAAAGAAGGCGCCTCGGGCTACCAACGGAAGCGAGAAGATAAGAGACAACGCATACACGACCGCATAGCA
A: 4 2 2 2 1 1 2 3 0 2 4 0 1 2 3 4 4 2 2 3 3 2 2 2 3 2 4 3 3 3 1 3 2 1

In [8]:
def mortal_rabbits(months, expectency):
    seqc = [1,1]
    while len(seqc) < months:
        nex = sum(seqc[-2:])
        if len(seqc) == expectency:
            nex -= seqc[-expectency] 
        elif len(seqc) > expectency:
            nex -= seqc[-(expectency+1)]
        seqc.append(nex)
    return seqc[-1]
mortal_rabbits(88, 18)

1093984671460515481

In [9]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(open("data/rosalind_grph.txt"), 'fasta')
strands = [(fasta.id, str(fasta.seq)) for fasta in fasta_sequences]

def connected(x,y,k):
    return x != y and x[-k:] == y[:k] 

graph = [(x[0],y[0]) 
         for x in strands 
         for y in strands 
         if connected(x[1],y[1],3)]

print("\n".join(["%s %s" % e for e in graph]))

Rosalind_6974 Rosalind_4238
Rosalind_6914 Rosalind_6403
Rosalind_6914 Rosalind_2922
Rosalind_7175 Rosalind_6403
Rosalind_7175 Rosalind_2922
Rosalind_4927 Rosalind_3916
Rosalind_9916 Rosalind_0251
Rosalind_9916 Rosalind_9473
Rosalind_9916 Rosalind_1669
Rosalind_9916 Rosalind_1958
Rosalind_5073 Rosalind_6974
Rosalind_5073 Rosalind_8132
Rosalind_5073 Rosalind_1512
Rosalind_5073 Rosalind_0693
Rosalind_8781 Rosalind_3102
Rosalind_8781 Rosalind_6142
Rosalind_3102 Rosalind_6199
Rosalind_8860 Rosalind_9276
Rosalind_2264 Rosalind_5514
Rosalind_2264 Rosalind_6339
Rosalind_3404 Rosalind_9922
Rosalind_4421 Rosalind_3549
Rosalind_4421 Rosalind_1315
Rosalind_0251 Rosalind_1692
Rosalind_3617 Rosalind_1641
Rosalind_3019 Rosalind_6403
Rosalind_3019 Rosalind_2922
Rosalind_2935 Rosalind_9276
Rosalind_2330 Rosalind_4238
Rosalind_3549 Rosalind_3916
Rosalind_4238 Rosalind_3209
Rosalind_4238 Rosalind_4012
Rosalind_4238 Rosalind_2098
Rosalind_5168 Rosalind_6808
Rosalind_5168 Rosalind_7494
Rosalind_5168 Rosali

In [10]:
domProbs = [1.0, 1.0, 1.0, 0.75, 0.5, 0]

def expectation(pops, offSpring):
    return offSpring * sum([pop * p for (pop, p) in zip(pops, domProbs)])

expectation([18855, 19867, 18691, 19126, 17045, 16627], 2)

160560.0