In [271]:
import pandas as pd

from Bio import Seq
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq


In [272]:
def load_tile_info():
    """
    Read tile configuration from a designated csv file. Each tile should have a designated id, start, stop, front adapter
    and rear adapter.
    :return: List of dictionaries containing information for each tile
    """

    tiles = []

    for tile in pd.read_csv('data/tile_info.csv').set_index('tile').iterrows():

        tile_info = {
            'tile_i': tile[0],
            'start': tile[1][0],
            'stop': tile[1][1],
            'front': tile[1][2],
            'end': tile[1][3]
        }

        tiles += [tile_info]
    return tiles

TILE_INFO = load_tile_info()

In [273]:
UNTILED_SEQS = pd.read_csv('out/screened_results.csv')
REF_DNA = SeqIO.read("data/TRBC_Extracellular.fasta", "fasta").seq
REF_AA = Seq.translate(REF_DNA)

def tile_library(library):
    """

    :param library:
    :return:
    """

    tiled_sequences =[]
    tiling_errors =[]
    error_count = 0

    for sequence in library.iterrows(): #Iterate through CSV and convert back into sequence dictionary

        tiled_sequence = {
            'position': sequence[1][1],
            'original_aa': sequence[1][2],
            'original_codon': sequence[1][3],
            'new_aa': sequence[1][4],
            'new_codon': sequence[1][5],
            'new_dna_seq': sequence[1][6],
            'new_aa_seq': sequence[1][7],
            "dna_change_s": sequence[1][8],
            "dna_change_e": sequence[1][9],
            "tile_i": None,
            "tile_core": None,
            "finished_tile": None
        }

        for tile in TILE_INFO: #Iterate through tiles described in TILE_INFO file

            tile_start = tile['start']
            tile_stop = tile['stop']

            seq_start = tiled_sequence['dna_change_s']
            seq_stop = tiled_sequence['dna_change_e']

            #Checks to see if the mutations in a sequence are contained within the tile
            if tile_start <= seq_start and tile_stop > seq_stop:
                tiled_sequence["tile_i"] = tile["tile_i"]

                #Compensate for indexing changes due to a deletion
                if tiled_sequence['new_aa'] == 'del':
                    unappended_tile = tiled_sequence['new_dna_seq'][tile_start:tile_stop - 3]

                #Trucate all other sequences using regular indexing
                else:
                    unappended_tile = tiled_sequence['new_dna_seq'][tile_start:tile_stop]

                tiled_sequence["tile_core"] = unappended_tile
                tiled_sequence["finished_tile"] = tile["front"] + unappended_tile + tile["end"]
                tiled_sequences += [tiled_sequence]
                break

        #If a sequence iterates through all tile windows and is not assigned add it to error file
        if tiled_sequence['tile_i'] is None:
            tiling_errors += [tiled_sequence]
            error_count += 1

    print('Tiling errors: ' + str(error_count)) #Number of sequences that were not able to be assigned due to a tile

    return pd.DataFrame(tiled_sequences), pd.DataFrame(tiling_errors)

tiled_sequences, errors = tile_library(UNTILED_SEQS)
tiled_sequences.to_csv('out/tiled_library.csv')
errors.to_csv('out/tile_errors.csv')

def export_library(library):
    """

    :param library:
    :return:
    """
    formatted_sequences = [] #List of seq records to be compiled into fasta
    base_sequence = "TCRB_" #Name of base sequence for fasta formatting

    for member in library.iterrows():
        id = member[0]
        aa_i = member[1][0] + 1
        original_aa = member[1][1]
        new_aa= member[1][3]
        tile = member[1][9]
        sequence = Seq(member[1][11])

        name = base_sequence + original_aa + str(aa_i) + new_aa + '_T' + str(tile) + '_OLIGO' + str(id)

        record = SeqRecord(sequence, name, "","")
        formatted_sequences += [record]

    SeqIO.write(formatted_sequences, "out/TCRB_library.fasta", "fasta")


export_library(tiled_sequences)



Tiling errors: 0
