# TCR Library Design

In [5]:
import pandas as pd

from Bio import Seq
from Bio import SeqIO

In [6]:
#Import reference sequence as DNA and AA seq objects

REF_DNA = SeqIO.read("TRBC_Extracellular.fasta", "fasta").seq
REF_AA = Seq.translate(REF_DNA)

## Setup:




In [7]:
BLOSSOM_SUB_TABLE = {} # Dictionary with list of substitutions for a given amino acid based on blossom matrix
HOMOLOGY_SUB_TABLE = {} # Dictionary with list of mutation frequencies for a given position based on homologous variants
MUTATED_SEQUENCES = [] # List of lists containing dictionary entries for each mutated sequence. Index of first set of lists = position in AA sequence

## Mutation Generation:

Mutations are generated by iterating through the amino acid sequence of the protein. The first 4 substitutions for a
given position are alanine and the top 3 results from the blossom table. I then plan to look for the most frequent
mutation seen at that position in homologous sequences that is not included in the intial set of substitutions.

This list of AA substititions will be fed to Dan's code to pick the top two codons for that AA, formatted into a
dictionary that includes the position mutated, the original AA at that position, the new AA at that position, the
new codon used, the new DNA sequence, and the new AA sequence.

These dictionaries

In [8]:
def delete_position(aa_i):
    dna_i = aa_i * 3
    deleted_position = {
        "position": aa_i,
        "original_aa": REF_AA[aa_i],
        "new_aa": "*",
        "new_codon": "",
        "new_dna_seq": REF_DNA[:dna_i] + REF_DNA[dna_i + 3:],
        "new_aa_seq": REF_AA[:aa_i] + REF_AA[aa_i+1:]
    }

    return [deleted_position]

def generate_subs(aa_i):
    aa = REF_AA[aa_i] #AA being substituted
    sub_aas = [] #List of new AAs
    sub_sequences = [] #List of mutation dictionaries

    blossom_subs = BLOSSOM_SUB_TABLE[aa] #Alanine + top 3 mutations from blossom table
    homologous_subs = HOMOLOGY_SUB_TABLE[aa_i]

    return sub_sequences



## Testing:
Basic sanity checks for a given mutation:

1.) Does the translated new DNA sequence match the new AA sequence?

2.) Is the new DNA sequence different from the reference DNA sequence?

3.) Does the new codon match the new AA?

4.) Does the new AA match the

In [12]:
def check_translation(mutation_dict):
    if Seq.translate(mutation_dict["new_dna_seq"].seq) != mutation_dict["new_aa_seq"]:
        return False

    return True

def check_dna_change(mutation_dict):
    if mutation_dict["new_dna_seq"] == REF_DNA:
        return False

    return True

for aa_i in range(len(REF_AA)):
    MUTATED_SEQUENCES += delete_position(aa_i)
    print(MUTATED_SEQUENCES[aa_i]["new_aa_seq"])





EDLKNVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHFRCQVQFYGLSENDEWTQDRAKPVTQIVSAEAWGRADCGFTSESYQQGVLSATILYE
LDLKNVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHFRCQVQFYGLSENDEWTQDRAKPVTQIVSAEAWGRADCGFTSESYQQGVLSATILYE
LELKNVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHFRCQVQFYGLSENDEWTQDRAKPVTQIVSAEAWGRADCGFTSESYQQGVLSATILYE
LEDKNVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHFRCQVQFYGLSENDEWTQDRAKPVTQIVSAEAWGRADCGFTSESYQQGVLSATILYE
LEDLNVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHFRCQVQFYGLSENDEWTQDRAKPVTQIVSAEAWGRADCGFTSESYQQGVLSATILYE
LEDLKVFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHFRCQVQFYGLSENDEWTQDRAKPVTQIVSAEAWGRADCGFTSESYQQGVLSATILYE
LEDLKNFPPEVAVFEPSEAEISHTQKATLVCLATGFYPDHVELSWWVNGKEVHSGVSTDPQPLKEQPALNDSRYCLSSRLRVSATFWQNPRNHF