In [4]:
# import necessary modules
import numpy as np
import pandas as pd
from queue import Queue as queue
from random import choice
from random import shuffle
import pickle
import copy
from src.codonTable import codonTable
from src.codonUtils import utils

In [7]:
def reducedTriplet(to_remove):
    '''
    A function used to generate triplet decoding, fast fail genetic
    codes with an arbitrary number of amino acids randomly removed from the
    set of encoded amino acids. Uses a rational, 'top down' approach.
    Snakes along codon table to fill 16 maximally distant codons first (all
    greater than one mutation from each other), then randomly chooses 4 - n
    codons to place the remaining amino acids. If the number of knockouts
    is greater than 4, it skips the second step and only includes 16 - n in
    the first step.

    Parameters
    ----------
    int knockout: number of amino acids to omit

    Returns
    ----------
    dict table: a triplet fast fail table
    '''

    ############################
    # fill out first 16 codons #
    ############################

    # declare caching variables
    usedCodons = set()
    unusedAA = set(utils.residues[:-1]) - set(to_remove)
    # declare budding table
    table = {}
    # randomly permute rNTPs for positions 1 and 2, store 3 unpermuted
    pos1 = copy.deepcopy(utils.rNTPs)
    pos2 = copy.deepcopy(utils.rNTPs)
    pos3 = copy.deepcopy(utils.rNTPs)
    # store nucleotides for positions 1 and 2 in queues
    queue1 = queue()
    queue2 = queue()
    for nt1, nt2 in zip(pos1, pos2):
        queue1.put(nt1)
        queue2.put(nt2)

    # populate first 16 elements of table 
    for i in range(queue1.qsize()):
        # get first nucleotide of next codon
        nt1 = queue1.get()
        for j in range(queue2.qsize()):
            # get second nucleotide of next codon
            nt2 = queue2.get()
            # get third nucleotide
            nt3 = pos3[j]
            # build codon, assign it a residue
            codon = nt1 + nt2 + nt3
            AA = choice(tuple(unusedAA))
            table[codon] = AA
            # update caching variables
            usedCodons.add(codon)
            unusedAA.remove(AA)
            # re-enqueue second nucleotide
            queue2.put(nt2)
        # re-enqueue first nucleotide
        queue1.put(nt1)
        # dequeue and re-enqueue second position to shift array
        nt2 = queue2.get()
        queue2.put(nt2)

        # assign unused codons to STOP
        remainingCodons = set(utils.tripletCodons) - usedCodons
        for codon in remainingCodons:
            table[codon] = '*'

    # return built table
    return table, usedCodons

def updateAvailable3(newCodon, availableSet):
    ''' A function used to update the set of codons that can be used
    for triplet decoding fast fail code, given that a new codon is
    occupied.''' 

    # iterate over remaining codons
    copySet = list(availableSet)
    for codon in copySet:
        # remove codons that have two nucleotide overlaps
        count = 0
        for i in range(len(newCodon)):
            count += (codon[i] == newCodon[i])
        if count >= 2:
            availableSet.remove(codon)
    # return updated set
    return availableSet

def finishTable(table, usedCodons, to_add):
    '''a funciton used to fill out a 16 amino acid fast fail table
    '''
    availableCodons = set(utils.tripletCodons) - usedCodons
    unusedAA = set(to_add)
    for i in range(len(to_add)):
        # pick a codon from the available set and assign it an amino acid
        codon = choice(tuple(availableCodons))
        AA = choice(tuple(unusedAA))
        table[codon] = AA
        # update cache variables
        availableCodons = updateAvailable3(codon, availableCodons)
        usedCodons.add(codon)
        unusedAA.remove(AA)
    return table

In [10]:
# choose which AA to remove
to_remove = ['C', 'W', 'H', 'M']
# generate a cascading fast fail table with 16 AA
ff16_table, used_codons = reducedTriplet(to_remove)
ff16_Table = codonTable(table=ff16_table)
# save as csv to file and display
directory = '/home/jonathan/Dropbox/Lab/Fast Fail/Figures/misc/'
filename = 'ff16.csv'
ff16_Table.codonTable.to_csv(directory + filename)
ff16_Table.codonTable

Unnamed: 0,Unnamed: 1,U,C,A,G
U,U,UUU : A,UCU : *,UAU : *,UGU : *
U,C,UUC : *,UCC : Q,UAC : *,UGC : *
U,A,UUA : *,UCA : *,UAA : K,UGA : *
U,G,UUG : *,UCG : *,UAG : *,UGG : R
C,U,CUU : *,CCU : S,CAU : *,CGU : *
C,C,CUC : *,CCC : *,CAC : Y,CGC : *
C,A,CUA : *,CCA : *,CAA : *,CGA : V
C,G,CUG : G,CCG : *,CAG : *,CGG : *
A,U,AUU : *,ACU : *,AAU : I,AGU : *
A,C,AUC : *,ACC : *,AAC : *,AGC : L


In [11]:
# add the remaining few
fftable = copy.deepcopy(ff16_table)
fftable = finishTable(fftable, used_codons, to_remove)
ffTable = codonTable(table=fftable)
# save as csv to file and display
directory = '/home/jonathan/Dropbox/Lab/Fast Fail/Figures/misc/'
filename = 'fftable.csv'
ffTable.codonTable.to_csv(directory + filename)
ffTable.codonTable

Unnamed: 0,Unnamed: 1,U,C,A,G
U,U,UUU : A,UCU : *,UAU : *,UGU : H
U,C,UUC : *,UCC : Q,UAC : *,UGC : *
U,A,UUA : *,UCA : *,UAA : K,UGA : *
U,G,UUG : *,UCG : *,UAG : *,UGG : R
C,U,CUU : *,CCU : S,CAU : *,CGU : *
C,C,CUC : W,CCC : *,CAC : Y,CGC : *
C,A,CUA : *,CCA : *,CAA : *,CGA : V
C,G,CUG : G,CCG : *,CAG : *,CGG : *
A,U,AUU : *,ACU : M,AAU : I,AGU : *
A,C,AUC : *,ACC : *,AAC : *,AGC : L


In [12]:
# pickle the data for permanent briney goodness
with open('res/fftable_manuscript.pickle', 'wb') as handle:
    pickle.dump(fftable, handle)
with open('res/ff16table_manuscript.pickle', 'wb') as handle:
    pickle.dump(ff16_table, handle)

In [2]:
# test pickling
with open('res/fftable_manuscript.pickle', 'rb') as handle:
    ffTable = codonTable(table=pickle.load(handle))
with open('res/ff16table_manuscript.pickle', 'rb') as handle:
    ff16_Table = codonTable(table=pickle.load(handle))

In [3]:
ffTable.codonTable

Unnamed: 0,Unnamed: 1,U,C,A,G
U,U,UUU : A,UCU : *,UAU : *,UGU : *
U,C,UUC : *,UCC : T,UAC : *,UGC : *
U,A,UUA : *,UCA : H,UAA : I,UGA : *
U,G,UUG : *,UCG : *,UAG : M,UGG : V
C,U,CUU : *,CCU : Y,CAU : *,CGU : W
C,C,CUC : *,CCC : *,CAC : R,CGC : *
C,A,CUA : *,CCA : *,CAA : *,CGA : N
C,G,CUG : G,CCG : *,CAG : *,CGG : *
A,U,AUU : *,ACU : *,AAU : F,AGU : *
A,C,AUC : *,ACC : *,AAC : C,AGC : P


In [4]:
ff16_Table.codonTable

Unnamed: 0,Unnamed: 1,U,C,A,G
U,U,UUU : A,UCU : *,UAU : *,UGU : *
U,C,UUC : *,UCC : T,UAC : *,UGC : *
U,A,UUA : *,UCA : *,UAA : I,UGA : *
U,G,UUG : *,UCG : *,UAG : *,UGG : V
C,U,CUU : *,CCU : Y,CAU : *,CGU : *
C,C,CUC : *,CCC : *,CAC : R,CGC : *
C,A,CUA : *,CCA : *,CAA : *,CGA : N
C,G,CUG : G,CCG : *,CAG : *,CGG : *
A,U,AUU : *,ACU : *,AAU : F,AGU : *
A,C,AUC : *,ACC : *,AAC : *,AGC : P


In [5]:
# construct ffquad table

codons_to_assign = [
    'UUUU', 'CCAU', 'AAUU', 'GGAU', 'UGGU',
    'UCAC', 'CAUC', 'AGAC', 'GUUC', 'GGGC',
    'UAUA', 'CGAA', 'AUUA', 'GCAA', 'GUCA',
    'UGAG', 'CUUG', 'ACAG', 'GAUG', 'UUGG'
]

AA = list([AA for AA in utils.residues if AA != '*'])
shuffle(AA)

# initialize ffquad with all stops
ffquad = {codon:'*' for codon in utils.quadrupletCodons}
# assign sense positions
for (codon, AA) in zip(codons_to_assign, AA):
    ffquad[codon] = AA

In [8]:
# pickle results
# save as csv to file and display
directory = '/home/jonathan/Dropbox/Lab/Fast Fail/res/Quad Code/'
filename = 'ffquad_table.csv'
ffquad_Table = codonTable(table=ffquad)
ffquad_Table.codonTable.to_csv(directory + filename)


with open(directory+'ffquad.pickle', 'wb') as handle:
    pickle.dump(ffquad, handle)
    
ffquad_Table.codonTable

Unnamed: 0,Unnamed: 1,Unnamed: 2,U,C,A,G
U,U,U,UUUU : F,UCUU : *,UAUU : *,UGUU : *
U,U,C,UUCU : *,UCCU : *,UACU : *,UGCU : *
U,U,A,UUAU : *,UCAU : *,UAAU : *,UGAU : *
U,U,G,UUGU : *,UCGU : *,UAGU : *,UGGU : P
U,C,U,CUUU : *,CCUU : *,CAUU : *,CGUU : *
U,C,C,CUCU : *,CCCU : *,CACU : *,CGCU : *
U,C,A,CUAU : *,CCAU : A,CAAU : *,CGAU : *
U,C,G,CUGU : *,CCGU : *,CAGU : *,CGGU : *
U,A,U,AUUU : *,ACUU : *,AAUU : L,AGUU : *
U,A,C,AUCU : *,ACCU : *,AACU : *,AGCU : *
