In [1]:
# Python script for analyses of "Antibody affinity birth through somatic hypermutation" publication.
# This pipeline is divided into X sections. At the beginning of each section there is a comment which indicates which figures of the publication are generated based on that section.

# input sequences for these analyses are uploaded in data folder. By a successful run, the result of each section will be saved in output folder.
print('Running...')
import re
import operator

import os
#import sys
import pandas as pd
import numpy as np

import time
import itertools
import matplotlib.pyplot as plt
import glob
#import logomaker #https://logomaker.readthedocs.io

# Functions
def display_big():

    # df = pd.DataFrame()
    # pd.options.display.max_colwidth = 2000
    pd.set_option('display.max_rows', 10)
    pd.set_option('display.max_columns', 200)
    pd.set_option('display.width', 1000)

display_big()

Running...


In [2]:
data_folder='../data'

input_folder = os.getenv('VAR_IN_FOLDER', f"{data_folder}/input")
output_folder = os.getenv('VAR_OUT_FOLDER', f"{data_folder}/output")

In [3]:
def set_output_folder(section_output):
    output_folder=data_folder+'/output/'+section_output

    if not os.path.isdir(output_folder): # make output folder if it doesn't exist
        os.makedirs(output_folder)
    return(output_folder)

In [4]:
# Section1: preparation

output_folder_prep=set_output_folder('1_prep')
output_folder_num_miss=set_output_folder('2_num_miss')
output_folder_freq_pos=set_output_folder('3_freq_per_position')
output_folder_donuts=set_output_folder('4_donuts')
output_folder_seq_logos=set_output_folder('5_seq_logos')
output_folder_rs_prep=set_output_folder('6_prep_rs')

In [5]:
rs_method='R/S_spike_sil1' #add 1 to silent mutations to avoid division by zero error

def align_regions(seq, mouse, chain):
    # print('status 0')

    if chain=='VL':
        # print('status 1')
        FR1=seq[0:24] #8
        CDR1=seq[24:60] #12
        FR2=seq[60:111] #17
        CDR2=seq[111:120] #3
        FR3=seq[120:228] #36
        CDR3=seq[228:255] #9
        FR4=seq[255:285] #10

    elif mouse in ['HA-WT', 'HA-uMT']:
        # print('status 2')
        FR1=seq[0:15] #5
        CDR1=seq[15:39] #8
        FR2=seq[39:90] #17
        CDR2=seq[90:114] #8
        FR3=seq[114:228] #38
        CDR3=seq[228:267] #13
        FR4=seq[267:300] #11

    elif mouse in ['B18-383', 'B18']:
        # print('status 3')
        FR1=seq[0:33] #11
        CDR1=seq[33:57] #8
        FR2=seq[57:108] #17
        CDR2=seq[108:132] #8
        FR3=seq[132:246] #38
        CDR3=seq[246:285] #13
        FR4=seq[285:318] #11

    return ([FR1,CDR1,FR2,CDR2,FR3,CDR3,FR4])

In [6]:
del_sign='-'
ambiguity_sign='.'
aas_dic={'AAA':'K','AAC':'N','AAT':'N','AAG':'K','ACA':'T','ACC':'T','ACT':'T','ACG':'T','ATA':'I','ATC':'I',\
        'ATT':'I','ATG':'M','AGA':'R','AGC':'S','AGT':'S','AGG':'R','CAA':'Q','CAC':'H','CAT':'H','CAG':'Q',\
        'CCA':'P','CCC':'P','CCT':'P','CCG':'P','CTA':'L','CTC':'L','CTT':'L','CTG':'L','CGA':'R','CGC':'R',\
        'CGT':'R','CGG':'R','TAA':'*','TAC':'Y','TAT':'Y','TAG':'*','TCA':'S','TCC':'S','TCT':'S','TCG':'S',\
        'TTA':'L','TTC':'F','TTT':'F','TTG':'L','TGA':'*','TGC':'C','TGT':'C','TGG':'W','GAA':'E','GAC':'D',\
        'GAT':'D','GAG':'E','GCA':'A','GCC':'A','GCT':'A','GCG':'A','GTA':'V','GTC':'V','GTT':'V','GTG':'V',\
        'GGA':'G','GGC':'G','GGT':'G','GGG':'G','---':del_sign}
aas_list=['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*', del_sign]
aas_chemistry_list=['I', 'V', 'L', 'F', 'C', 'M', 'A', 'W', 'G', 'T', 'S', 'Y', 'P', 'H', 'N', 'D', 'Q', 'E', 'K', 'R']
nts_list=['A', 'C', 'G', 'T', del_sign, ambiguity_sign]

In [7]:
dfs_expanded_aas_included=pd.read_csv('{}/dfs_expanded_aas_excluded.tsv'.format(output_folder_prep), sep='\t', header=0, low_memory=False)
dfs_expanded_aas_included.reset_index(inplace=True, drop=True)
dfs_expanded_aas_included

Unnamed: 0,header,seq_nt,status,mouse,dataset,chain,sub_dataset,mouse_DB,label,type,ref_nt,ref_aa,seq_aa,stopcodon,len_nt,len_aa,nt_ins,nt_dels,nt_miss,nt_N,aa_ins,aa_dels,aa_miss,aa_.,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30,A31,A32,A33,A34,A35,A36,A37,A38,A39,A40,A41,A42,A43,A44,A45,A46,A47,A48,A49,A50,A51,A52,A53,A54,A55,A56,A57,A58,A59,A60,A61,A62,A63,A64,A65,A66,A67,A68,A69,A70,A71,A72,A73,A74,A75,A76,A77,A78,A79,A80,A81,A82,A83,A84,A85,A86,A87,A88,A89,A90,A91,A92,A93,A94,A95,A96,A97,A98,A99,A100,A101,A102,A103,A104,A105
0,58A_074_L-1524392-R2-A10_L_B10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKSYLTWYQQKLGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,8,0,0,0,3,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,S,Y,L,T,W,Y,Q,Q,K,L,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,
1,51I_053_L-1350833-1362242-R-D11_L_H10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,
2,57B_101_L-1524393-L-D6_L_E05,GTCACTCTGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAG...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTLSCTSSQSLFNSGEQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,2,0,0,0,2,0,V,T,L,S,C,T,S,S,Q,S,L,F,N,S,G,E,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,
3,51I_035_L-1350833-1362242-R-B7_L_F08,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,
4,58A_094_L-1524393-L-C9_L_F12,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTAGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSRKQKNFLTWYQQKPGQPPKLLIYWASTRESGV...,False,285.0,95.0,0,0,6,0,0,0,4,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,R,K,Q,K,N,F,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,L,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11401,52A_018_H-1350833-unL-1362242-L-B12_H_B03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTACCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSTYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,False,300.0,100.0,0,0,2,0,0,0,2,0,S,C,A,A,S,G,F,T,F,S,T,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,E,N,G,F,S,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,
11402,53A_021_H-1524392-R-C5_H_E03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGDTYTSCPDSVKG...,False,300.0,100.0,0,0,7,0,0,0,5,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,D,T,Y,T,S,C,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,D,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,
11403,53A_022_H-1524392-R-C7_H_F03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTFYPDTVKG...,False,300.0,100.0,0,0,3,0,0,0,3,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,F,Y,P,D,T,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,G,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,
11404,53A_016_H-1524392-R-B10_H_H02,TCCTGTGCAGCCTCTGGAATCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGITFSSYGMSWVRQTPDKRLEWVATISNGGGSTYYPDSVKG...,False,300.0,100.0,0,0,4,0,0,0,3,0,S,C,A,A,S,G,I,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,S,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,S,R,E,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,


In [8]:
def rs_ratio(df_initial):
    df=df_initial.copy()
    df.reset_index(inplace=True, drop=True)

    df[['replacement_mut', 'silent_mut']]=''

    df[['R-S_check']]='' #separated calculation

    for i in df.index: # each row
        NT_replacement_list, NT_silent_list, NT_r_s = [], [], []

        query_seq=df.loc[i, 'seq_nt']
        ref_seq=df.loc[i, 'ref_nt']

        seq_regions=align_regions(query_seq, df.loc[i, 'mouse'], df.loc[i, 'chain'])
        ref_regions=align_regions(ref_seq, df.loc[i, 'mouse'], df.loc[i, 'chain'])

        for seq_region, ref_region in zip (seq_regions, ref_regions): #Iterate for each region in one sequence

            NT_replacement, NT_silent=0,0 #How many AA are replaced or not replaced

            ref_codons=[ref_region[i:i+3] for i in range(0, len(ref_region), 3)] #Split to codons
            seq_codons=[seq_region[i:i+3] for i in range(0, len(seq_region), 3)] #Split to codons
            # print(seq)
            # print(ref)

            for codon_seq, codon_ref in zip (seq_codons, ref_codons): #Iterate for each codon in this region
                    
                if codon_seq=='---': #Codon deletion is not count as a mutation
                    continue  #skip this iteration

                elif codon_ref=='---': #Codon insertion is not count as a mutation
                    continue  #skip this iteration

                elif codon_seq not in aas_dic: continue # Ignoring ambiguous codons (containing Ns)
                
                elif aas_dic[codon_seq]!=aas_dic[codon_ref]: #AA Replacement (Missense)
                    #print('R')
                    # AA_replacement+=1
                    NT_replacement+=sum(c1!=c2 for c1,c2 in zip(codon_seq,codon_ref))

                elif codon_seq!=codon_ref and aas_dic[codon_seq]==aas_dic[codon_ref]: #AA Silent mutation
                    #print('S')
                    # AA_silent+=1
                    NT_silent+=sum(c1!=c2 for c1,c2 in zip(codon_seq,codon_ref))

                elif codon_seq==codon_ref: #No change
                    pass
                    #print('N')

                else:
                    raise Exception("Something is wrong!")

            NT_replacement_list.append(NT_replacement)
            NT_silent_list.append(NT_silent)
            NT_r_s.append(NT_replacement-NT_silent) #separated calculation

        df.at[i, 'replacement_mut']=NT_replacement_list
        df.at[i, 'silent_mut']=NT_silent_list
        df.at[i, 'R-S_check']=NT_r_s #separated calculation

    return(df)

dfs=rs_ratio(dfs_expanded_aas_included)
dfs

Unnamed: 0,header,seq_nt,status,mouse,dataset,chain,sub_dataset,mouse_DB,label,type,ref_nt,ref_aa,seq_aa,stopcodon,len_nt,len_aa,nt_ins,nt_dels,nt_miss,nt_N,aa_ins,aa_dels,aa_miss,aa_.,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30,A31,A32,A33,A34,A35,A36,A37,A38,A39,A40,A41,A42,A43,A44,A45,A46,A47,A48,A49,A50,A51,A52,A53,A54,A55,A56,A57,A58,A59,A60,A61,A62,A63,A64,A65,A66,A67,A68,A69,A70,A71,A72,A73,A74,A75,A76,A77,A78,A79,A80,A81,A82,A83,A84,A85,A86,A87,A88,A89,A90,A91,A92,A93,A94,A95,A96,A97,A98,A99,A100,A101,A102,A103,A104,A105,replacement_mut,silent_mut,R-S_check
0,58A_074_L-1524392-R2-A10_L_B10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKSYLTWYQQKLGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,8,0,0,0,3,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,S,Y,L,T,W,Y,Q,Q,K,L,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 1, 2, 0, 0, 1, 0]","[0, 0, 2, 0, 1, 1, 0]","[0, 1, 0, 0, -1, 0, 0]"
1,51I_053_L-1350833-1362242-R-D11_L_H10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, -1, 0]"
2,57B_101_L-1524393-L-D6_L_E05,GTCACTCTGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAG...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTLSCTSSQSLFNSGEQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,2,0,0,0,2,0,V,T,L,S,C,T,S,S,Q,S,L,F,N,S,G,E,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[1, 1, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0]","[1, 1, 0, 0, 0, 0, 0]"
3,51I_035_L-1350833-1362242-R-B7_L_F08,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, -1, 0, 0]"
4,58A_094_L-1524393-L-C9_L_F12,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTAGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSRKQKNFLTWYQQKPGQPPKLLIYWASTRESGV...,False,285.0,95.0,0,0,6,0,0,0,4,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,R,K,Q,K,N,F,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,L,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 2, 1, 0, 0, 1, 0]","[0, 0, 0, 0, 1, 1, 0]","[0, 2, 1, 0, -1, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11401,52A_018_H-1350833-unL-1362242-L-B12_H_B03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTACCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSTYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,False,300.0,100.0,0,0,2,0,0,0,2,0,S,C,A,A,S,G,F,T,F,S,T,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,E,N,G,F,S,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 1, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 1, 0]"
11402,53A_021_H-1524392-R-C5_H_E03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGDTYTSCPDSVKG...,False,300.0,100.0,0,0,7,0,0,0,5,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,D,T,Y,T,S,C,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,D,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 0, 0, 3, 2, 1, 0]","[0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 3, 2, 0, 0]"
11403,53A_022_H-1524392-R-C7_H_F03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTFYPDTVKG...,False,300.0,100.0,0,0,3,0,0,0,3,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,F,Y,P,D,T,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,G,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 0, 0, 0, 2, 1, 0]","[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 2, 1, 0]"
11404,53A_016_H-1524392-R-B10_H_H02,TCCTGTGCAGCCTCTGGAATCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGITFSSYGMSWVRQTPDKRLEWVATISNGGGSTYYPDSVKG...,False,300.0,100.0,0,0,4,0,0,0,3,0,S,C,A,A,S,G,I,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,S,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,S,R,E,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 1, 0, 1, 0, 1, 0]","[0, 0, 0, 0, 1, 0, 0]","[0, 1, 0, 1, -1, 1, 0]"


In [9]:
rs_method

for i in dfs.index: # each row
    R_list=dfs.loc[i, 'replacement_mut']
    S_list=dfs.loc[i, 'silent_mut']
    for j, region in enumerate(['FR1','CDR1','FR2','CDR2','FR3','CDR3','FR4']):

        if rs_method == 'R/S_spike_sil1':
            dfs.loc[i, region]=R_list[j]/(S_list[j]+1) # adds one silent mutation to all cases
dfs

Unnamed: 0,header,seq_nt,status,mouse,dataset,chain,sub_dataset,mouse_DB,label,type,ref_nt,ref_aa,seq_aa,stopcodon,len_nt,len_aa,nt_ins,nt_dels,nt_miss,nt_N,aa_ins,aa_dels,aa_miss,aa_.,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30,A31,A32,A33,A34,A35,A36,A37,A38,A39,A40,A41,A42,A43,A44,A45,A46,A47,A48,A49,A50,A51,A52,A53,A54,A55,A56,A57,A58,A59,A60,A61,A62,A63,A64,A65,A66,A67,A68,A69,A70,A71,A72,A73,A74,A75,A76,A77,A78,A79,A80,A81,A82,A83,A84,A85,A86,A87,A88,A89,A90,A91,A92,A93,A94,A95,A96,A97,A98,A99,A100,A101,A102,A103,A104,A105,replacement_mut,silent_mut,R-S_check,FR1,CDR1,FR2,CDR2,FR3,CDR3,FR4
0,58A_074_L-1524392-R2-A10_L_B10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKSYLTWYQQKLGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,8,0,0,0,3,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,S,Y,L,T,W,Y,Q,Q,K,L,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 1, 2, 0, 0, 1, 0]","[0, 0, 2, 0, 1, 1, 0]","[0, 1, 0, 0, -1, 0, 0]",0.0,1.0,0.666667,0.0,0.0,0.5,0.0
1,51I_053_L-1350833-1362242-R-D11_L_H10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, -1, 0]",0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,57B_101_L-1524393-L-D6_L_E05,GTCACTCTGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAG...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTLSCTSSQSLFNSGEQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,2,0,0,0,2,0,V,T,L,S,C,T,S,S,Q,S,L,F,N,S,G,E,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[1, 1, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0]","[1, 1, 0, 0, 0, 0, 0]",1.0,1.0,0.000000,0.0,0.0,0.0,0.0
3,51I_035_L-1350833-1362242-R-B7_L_F08,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, -1, 0, 0]",0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,58A_094_L-1524393-L-C9_L_F12,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTAGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSRKQKNFLTWYQQKPGQPPKLLIYWASTRESGV...,False,285.0,95.0,0,0,6,0,0,0,4,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,R,K,Q,K,N,F,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,L,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 2, 1, 0, 0, 1, 0]","[0, 0, 0, 0, 1, 1, 0]","[0, 2, 1, 0, -1, 0, 0]",0.0,2.0,1.000000,0.0,0.0,0.5,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11401,52A_018_H-1350833-unL-1362242-L-B12_H_B03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTACCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSTYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,False,300.0,100.0,0,0,2,0,0,0,2,0,S,C,A,A,S,G,F,T,F,S,T,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,E,N,G,F,S,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 1, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 1, 0]",0.0,1.0,0.000000,0.0,0.0,1.0,0.0
11402,53A_021_H-1524392-R-C5_H_E03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGDTYTSCPDSVKG...,False,300.0,100.0,0,0,7,0,0,0,5,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,D,T,Y,T,S,C,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,D,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 0, 0, 3, 2, 1, 0]","[0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 3, 2, 0, 0]",0.0,0.0,0.000000,3.0,2.0,0.5,0.0
11403,53A_022_H-1524392-R-C7_H_F03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTFYPDTVKG...,False,300.0,100.0,0,0,3,0,0,0,3,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,F,Y,P,D,T,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,G,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 0, 0, 0, 2, 1, 0]","[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 2, 1, 0]",0.0,0.0,0.000000,0.0,2.0,1.0,0.0
11404,53A_016_H-1524392-R-B10_H_H02,TCCTGTGCAGCCTCTGGAATCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGITFSSYGMSWVRQTPDKRLEWVATISNGGGSTYYPDSVKG...,False,300.0,100.0,0,0,4,0,0,0,3,0,S,C,A,A,S,G,I,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,S,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,S,R,E,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 1, 0, 1, 0, 1, 0]","[0, 0, 0, 0, 1, 0, 0]","[0, 1, 0, 1, -1, 1, 0]",0.0,1.0,0.000000,1.0,0.0,1.0,0.0


In [10]:
dfs[['FR1','CDR1','FR2','CDR2','FR3','CDR3','FR4']].max()

FR1      6.0
CDR1    10.0
FR2     22.0
CDR2    10.0
FR3     20.5
CDR3    14.0
FR4     10.0
dtype: float64

In [11]:
dfs.to_csv('{}/dfs_rs_ratios.tsv'.format(output_folder_rs_prep), sep = '\t', index=False)
dfs

Unnamed: 0,header,seq_nt,status,mouse,dataset,chain,sub_dataset,mouse_DB,label,type,ref_nt,ref_aa,seq_aa,stopcodon,len_nt,len_aa,nt_ins,nt_dels,nt_miss,nt_N,aa_ins,aa_dels,aa_miss,aa_.,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,A28,A29,A30,A31,A32,A33,A34,A35,A36,A37,A38,A39,A40,A41,A42,A43,A44,A45,A46,A47,A48,A49,A50,A51,A52,A53,A54,A55,A56,A57,A58,A59,A60,A61,A62,A63,A64,A65,A66,A67,A68,A69,A70,A71,A72,A73,A74,A75,A76,A77,A78,A79,A80,A81,A82,A83,A84,A85,A86,A87,A88,A89,A90,A91,A92,A93,A94,A95,A96,A97,A98,A99,A100,A101,A102,A103,A104,A105,replacement_mut,silent_mut,R-S_check,FR1,CDR1,FR2,CDR2,FR3,CDR3,FR4
0,58A_074_L-1524392-R2-A10_L_B10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKSYLTWYQQKLGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,8,0,0,0,3,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,S,Y,L,T,W,Y,Q,Q,K,L,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 1, 2, 0, 0, 1, 0]","[0, 0, 2, 0, 1, 1, 0]","[0, 1, 0, 0, -1, 0, 0]",0.0,1.0,0.666667,0.0,0.0,0.5,0.0
1,51I_053_L-1350833-1362242-R-D11_L_H10,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, -1, 0]",0.0,0.0,0.000000,0.0,0.0,0.0,0.0
2,57B_101_L-1524393-L-D6_L_E05,GTCACTCTGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAG...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTLSCTSSQSLFNSGEQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,2,0,0,0,2,0,V,T,L,S,C,T,S,S,Q,S,L,F,N,S,G,E,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[1, 1, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0]","[1, 1, 0, 0, 0, 0, 0]",1.0,1.0,0.000000,0.0,0.0,0.0,0.0
3,51I_035_L-1350833-1362242-R-B7_L_F08,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,False,285.0,95.0,0,0,1,0,0,0,0,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,G,K,Q,K,N,Y,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,V,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,N,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, -1, 0, 0]",0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,58A_094_L-1524393-L-C9_L_F12,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTAGAA...,LateGC,HA-WT,APC,VL,1-1,HA,LateGC_HA-WT_APC_VL_1-1,query,GTCACTATGAGCTGCACGTCCAGTCAGAGTCTGTTTAACAGTGGAA...,VTMSCTSSQSLFNSGKQKNYLTWYQQKPGQPPKVLIYWASTRESGV...,VTMSCTSSQSLFNSRKQKNFLTWYQQKPGQPPKLLIYWASTRESGV...,False,285.0,95.0,0,0,6,0,0,0,4,0,V,T,M,S,C,T,S,S,Q,S,L,F,N,S,R,K,Q,K,N,F,L,T,W,Y,Q,Q,K,P,G,Q,P,P,K,L,L,I,Y,W,A,S,T,R,E,S,G,V,P,D,R,F,T,G,S,G,S,G,T,D,F,T,L,T,I,S,S,V,Q,A,E,D,L,A,V,Y,Y,C,Q,N,D,Y,S,Y,P,L,T,F,G,G,G,T,K,L,E,L,K,,,,,,,,,,,,"[0, 2, 1, 0, 0, 1, 0]","[0, 0, 0, 0, 1, 1, 0]","[0, 2, 1, 0, -1, 0, 0]",0.0,2.0,1.000000,0.0,0.0,0.5,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11401,52A_018_H-1350833-unL-1362242-L-B12_H_B03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTACCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSTYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,False,300.0,100.0,0,0,2,0,0,0,2,0,S,C,A,A,S,G,F,T,F,S,T,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,E,N,G,F,S,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 1, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 1, 0]",0.0,1.0,0.000000,0.0,0.0,1.0,0.0
11402,53A_021_H-1524392-R-C5_H_E03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGDTYTSCPDSVKG...,False,300.0,100.0,0,0,7,0,0,0,5,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,D,T,Y,T,S,C,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,E,R,Y,D,D,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 0, 0, 3, 2, 1, 0]","[0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 3, 2, 0, 0]",0.0,0.0,0.000000,3.0,2.0,0.5,0.0
11403,53A_022_H-1524392-R-C7_H_F03,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTFYPDTVKG...,False,300.0,100.0,0,0,3,0,0,0,3,0,S,C,A,A,S,G,F,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,Y,T,F,Y,P,D,T,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,R,R,G,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 0, 0, 0, 2, 1, 0]","[0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 2, 1, 0]",0.0,0.0,0.000000,0.0,2.0,1.0,0.0
11404,53A_016_H-1524392-R-B10_H_H02,TCCTGTGCAGCCTCTGGAATCACTTTCAGTAGCTATGGCATGTCTT...,LateGC,HA-WT,mix,VH,1-1,HA,LateGC_HA-WT_mix_VH_1-1,query,TCCTGTGCAGCCTCTGGATTCACTTTCAGTAGCTATGGCATGTCTT...,SCAASGFTFSSYGMSWVRQTPDKRLEWVATISNGGGYTYYPDSVKG...,SCAASGITFSSYGMSWVRQTPDKRLEWVATISNGGGSTYYPDSVKG...,False,300.0,100.0,0,0,4,0,0,0,3,0,S,C,A,A,S,G,I,T,F,S,S,Y,G,M,S,W,V,R,Q,T,P,D,K,R,L,E,W,V,A,T,I,S,N,G,G,G,S,T,Y,Y,P,D,S,V,K,G,R,F,T,I,S,R,D,N,A,K,N,T,L,Y,L,Q,M,S,S,L,K,S,E,D,S,A,M,Y,Y,C,A,S,R,E,R,Y,D,E,N,G,F,A,Y,W,G,Q,G,T,L,V,T,V,S,A,,,,,,,"[0, 1, 0, 1, 0, 1, 0]","[0, 0, 0, 0, 1, 0, 0]","[0, 1, 0, 1, -1, 1, 0]",0.0,1.0,0.000000,1.0,0.0,1.0,0.0
