In [196]:
import pandas as pd
import numpy as np
import random

from Bio import Seq
from Bio import SeqIO

In [197]:
def load_tile_info():
    tiles = []

    for tile in pd.read_csv('data/tile_info.csv').set_index('tile').iterrows():

        tile_info = {
            'tile_i': tile[0],
            'start': tile[1][0],
            'stop': tile[1][1],
            'front': tile[1][2],
            'end': tile[1][3]
        }

        tiles += [tile_info]
    return tiles

TILE_INFO = load_tile_info()

In [198]:
UNTILED_SEQS = pd.read_csv('out/screened_results.csv')
REF_DNA = SeqIO.read("data/TRBC_Extracellular.fasta", "fasta").seq
REF_AA = Seq.translate(REF_DNA)

def tile_library(library):
    tiled_sequences =[]
    tiling_errors =[]
    error_count = 0

    for sequence in library.iterrows(): #Iterate through CSV and convert back into sequence dictionary

        tiled_sequence = {
            'position': sequence[1][1],
            'original_aa': sequence[1][2],
            'original_codon': sequence[1][3],
            'new_aa': sequence[1][4],
            'new_codon': sequence[1][5],
            'new_dna_seq': sequence[1][6],
            'new_aa_seq': sequence[1][7],
            "dna_change_s": sequence[1][8],
            "dna_change_e": sequence[1][9],
            "tile_i": None,
            "tile_core": None,
            "finished_tile": None
        }

        for tile in TILE_INFO:
            tile_start = tile['start']
            tile_stop = tile['stop']

            seq_start = tiled_sequence['dna_change_s']
            seq_stop = tiled_sequence['dna_change_e']

            if tile_start <= seq_start and tile_stop > seq_stop:
                tiled_sequence["tile_i"] = tile["tile_i"]

                if tiled_sequence['new_aa'] == 'del':
                    unappended_tile = tiled_sequence['new_dna_seq'][tile_start:tile_stop - 3]

                else:
                    unappended_tile = tiled_sequence['new_dna_seq'][tile_start:tile_stop]

                tiled_sequence["tile_core"] = unappended_tile
                tiled_sequence["finished_tile"] = tile["front"] + unappended_tile + tile["end"]
                tiled_sequences += [tiled_sequence]
                break

        if tiled_sequence['tile_i'] is None:
            tiling_errors += [tiled_sequence]
            error_count += 1

    print('Tiling errors: ' + str(error_count))

    return pd.DataFrame(tiled_sequences), pd.DataFrame(tiling_errors)

tiled_sequences, errors = tile_library(UNTILED_SEQS)

tiled_sequences.to_csv('out/tiled_library.csv')
errors.to_csv('out/tile_errors.csv')




Tiling errors: 7
