In [3]:
#Reads RNA in FASTA format from a file
#Parameter filename: A string containing the relative path of the file from where
#this program is stored
#Return: The RNA sequences contained in the file in list format
def readFile(filename):
    f = open(filename, "r")
    genes = []
    for x in f:
        if x is not "\n":
            genes.append(f.readline())
    f.close()
    return genes

#Sets up a dictionary that matches 3 RNA nucleotides to their corresponding
#amino acid or to Stop
#Return: The dictionary after being set up
def setUpTable():
    table = {}
    for key in ["UUU", "UUC"]:
        table[key] = "Phe"
    for key in ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"]:
        table[key] = "Leu"
    for key in ["AUU", "AUC", "AUA"]:
        table[key] = "Ile"
    for key in ["AUG"]:
        table[key] = "Met"
    for key in ["GUU", "GUC", "GUA", "GUG"]:
        table[key] = "Val"
    for key in ["UCU", "UCC", "UCA", "UCG", "AGU", "AGC"]:
        table[key] = "Ser"
    for key in ["CCU", "CCC", "CCA", "CCG"]:
        table[key] = "Pro"
    for key in ["ACU", "ACC", "ACA", "ACG"]:
        table[key] = "Thr"
    for key in ["GCU", "GCC", "GCA", "GCG"]:
        table[key] = "Ala"
    for key in ["UAU", "UAC"]:
        table[key] = "Tyr"
    for key in ["CAU", "CAC"]:
        table[key] = "His"
    for key in ["CAA", "CAG"]:
        table[key] = "Gln"
    for key in ["AAU", "AAC"]:
        table[key] = "Asn"
    for key in ["AAA", "AAG"]:
        table[key] = "Lys"
    for key in ["GAU", "GAC"]:
        table[key] = "Asp"
    for key in ["GAA", "GAG"]:
        table[key] = "Glu"
    for key in ["UGU", "UGC"]:
        table[key] = "Cys"
    for key in ["UGG"]:
        table[key] = "Trp"
    for key in ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"]:
        table[key] = "Arg"
    for key in ["GGU", "GGC", "GGA", "GGG"]:
        table[key] = "Gly"
    for key in ["UAA", "UAG", "UGA"]:
        table[key] = "Stop"
    return table

#Converts an RNA sequence into the corresponding amino acid sequence. Does not
#yet handle stop codons besides adding stop
#Parameter RNA: The RNA sequence in string format
#Parameter table: The dictionary that pairs RNA to amino acids
#Return: A string containing the amino acid sequence
def convertToAAs(RNA, table):
    seq = []
    current = 0
    while current + 2 < len(RNA):
        seq.append(table[RNA[current:current + 3]])
        current += 3
    return seq

#Main
genes = readFile("Assignment1Sequences.txt")
AATable = setUpTable()
AASeqs = []
for gene in genes:
    AASeqs.append(convertToAAs(gene, AATable))
print(AASeqs)

[['Met', 'Ser', 'Asn', 'Pro', 'Gln', 'Lys', 'Ala', 'Leu', 'Asn', 'Asp', 'Phe', 'Leu', 'Ser', 'Ser', 'Glu', 'Ser', 'Val', 'His', 'Thr', 'His', 'Asp', 'Ser', 'Ser', 'Arg', 'Lys', 'Gln', 'Ser', 'Asn', 'Lys', 'Gln', 'Ser', 'Ser', 'Asp', 'Glu', 'Gly', 'Arg', 'Ser', 'Ser', 'Ser', 'Gln', 'Pro', 'Ser', 'His', 'His', 'His', 'Ser', 'Gly', 'Gly', 'Thr', 'Asn', 'Asn', 'Ser', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Ser', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Gly', 'Asn', 'Asp', 'Gly', 'Gly', 'Asn', 'Asp', 'Asp', 'Asp', 'Tyr', 'Asp', 'Tyr', 'Glu', 'Met', 'Gln', 'Asp', 'Tyr', 'Arg', 'Pro', 'Ser', 'Pro', 'Gln', 'Ser', 'Ala', 'Arg', 'Pro', 'Thr', 'Pro', 'Thr', 'Tyr', 'Val', 'Pro', 'Gln', 'Tyr', 'Ser', 'Val', 'Glu', 'Ser', 'Gly', 'Thr', 'Ala', 'Phe', 'Pro', 'Ile', 'Gln', 'Glu', 'Val', 'Ile', 'Pro', 'Ser', 'Ala', 'Tyr', 'Ile', 'Asn', 'Thr', 'Gln', 'Asp', 'Ile', 'Asn', 'His', 'Lys', 'Asp', 'Asn', 'Gly', 'Pro', 'Pro', 'Ser', 'Ala', 'Ser', 'Ser', 'Asn', 'Arg', 'Ala', 'Phe', 'Arg', 'Pro

In [4]:
print(AASeqs[2])

['Met', 'Ala', 'Ser', 'Ser', 'Ala', 'Ala', 'Val', 'Val', 'Glu', 'Ile', 'Ile', 'Asp', 'Glu', 'Asp', 'Asp', 'Asp', 'Asp', 'Thr', 'Ala', 'Ala', 'Ala', 'Thr', 'Pro', 'Leu', 'Ala', 'Val', 'His', 'Lys', 'Arg', 'Ser', 'His', 'Ala', 'Leu', 'Ala', 'Ala', 'Ala', 'Thr', 'Thr', 'Ala', 'Pro', 'Gly', 'Pro', 'Pro', 'Pro', 'Val', 'Asp', 'Ala', 'Ser', 'Arg', 'Ser', 'Val', 'Ser', 'Ala', 'Val', 'Ala', 'Asp', 'Thr', 'Pro', 'Arg', 'Ser', 'Ser', 'Val', 'Pro', 'Cys', 'Ser', 'Leu', 'Gly', 'Asn', 'Arg', 'Ala', 'Val', 'Ala', 'Asp', 'Asp', 'Thr', 'Pro', 'Asn', 'Ser', 'Val', 'Leu', 'Pro', 'Ser', 'Pro', 'Ser', 'His', 'Phe', 'Gly', 'Val', 'Ala', 'Asp', 'Ser', 'Ala', 'Thr', 'Pro', 'Arg', 'Ser', 'Asp', 'Val', 'Pro', 'Cys', 'Ser', 'Ile', 'Gly', 'Pro', 'Ala', 'Asp', 'Val', 'Val', 'Pro', 'Glu', 'Thr', 'Pro', 'Gly', 'Leu', 'Ala', 'Val', 'Pro', 'Arg', 'Leu', 'Ala', 'Ala', 'Pro', 'Pro', 'Ser', 'Val', 'Pro', 'Ala', 'Leu', 'Pro', 'Ser', 'Leu', 'Ala', 'Thr', 'Ala', 'Arg', 'Lys', 'Phe', 'Ser', 'Gly', 'Val', 'Pro', 'Cys', 'Pro'

In [6]:
gene3 = AASeqs[2]
fstring = ""
for aa in gene3:
    if aa == 'Met':
        fstring += '1'
    elif aa == 'Stop':
        fstring += '2'
    else:
        fstring += '.'
        
print(fstring)

1......................................................................................................................................................................1....................................................................................................1......1................1......................................................................1............................................................1........................................................................................................................1.......1................1........................1.............2...............2......1........2.......1.........1...............2.........


In [26]:
def make_proteins(gene, output = False):
    i = 0
    proteins = []
    while i < len(gene):
        t = []
        if gene[i] != 'Met':
            i += 1
            continue
        else:
            t.append('Met')
            i += 1
            while gene[i] != 'Stop':
                t.append(gene[i])
                i += 1
            t.append('Stop')
            proteins.append(t)
    if output:
        for p in proteins:
            print(p, end='\n\n')
        
    return proteins
        
for gene in AASeqs:
    make_proteins(gene, output = True)

['Met', 'Ser', 'Asn', 'Pro', 'Gln', 'Lys', 'Ala', 'Leu', 'Asn', 'Asp', 'Phe', 'Leu', 'Ser', 'Ser', 'Glu', 'Ser', 'Val', 'His', 'Thr', 'His', 'Asp', 'Ser', 'Ser', 'Arg', 'Lys', 'Gln', 'Ser', 'Asn', 'Lys', 'Gln', 'Ser', 'Ser', 'Asp', 'Glu', 'Gly', 'Arg', 'Ser', 'Ser', 'Ser', 'Gln', 'Pro', 'Ser', 'His', 'His', 'His', 'Ser', 'Gly', 'Gly', 'Thr', 'Asn', 'Asn', 'Ser', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Ser', 'Asn', 'Asn', 'Asn', 'Asn', 'Asn', 'Gly', 'Asn', 'Asp', 'Gly', 'Gly', 'Asn', 'Asp', 'Asp', 'Asp', 'Tyr', 'Asp', 'Tyr', 'Glu', 'Met', 'Gln', 'Asp', 'Tyr', 'Arg', 'Pro', 'Ser', 'Pro', 'Gln', 'Ser', 'Ala', 'Arg', 'Pro', 'Thr', 'Pro', 'Thr', 'Tyr', 'Val', 'Pro', 'Gln', 'Tyr', 'Ser', 'Val', 'Glu', 'Ser', 'Gly', 'Thr', 'Ala', 'Phe', 'Pro', 'Ile', 'Gln', 'Glu', 'Val', 'Ile', 'Pro', 'Ser', 'Ala', 'Tyr', 'Ile', 'Asn', 'Thr', 'Gln', 'Asp', 'Ile', 'Asn', 'His', 'Lys', 'Asp', 'Asn', 'Gly', 'Pro', 'Pro', 'Ser', 'Ala', 'Ser', 'Ser', 'Asn', 'Arg', 'Ala', 'Phe', 'Arg', 'Pro'

In [15]:
non_polar = [
    'Gly',
    'Ala',
    'Pro',
    'Val',
    'Leu',
    'Iso',
    'Met',
    'Trp',
    'Phe'
]


In [27]:
for gene in AASeqs:
    proteins = make_proteins(gene)
    for pro in proteins:
        p = 0
        for aa in pro:
            if aa in non_polar:
                p += 1
        p = p / len(pro)
        print(p)
    print()

0.41492537313432837

0.4634920634920635

0.4
0.7
0.6296296296296297



In [29]:
for gene in AASeqs:
    proteins = make_proteins(gene)
    for pro in proteins:
        print(len(pro))
    print()

670

315

610
10
27

