In [156]:
from egfr_binder_rd2.fitness import get_fitness, get_exact_fitness
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from egfr_binder_rd2.bt import PartialEnsembleModuleWithFeatures
import torch
from tqdm import tqdm

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_colwidth', None)


def predict_sequences(sequences, models, device='cuda'):

    """Run inference on a list of sequences."""
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
    results_list = []
    
    for sequence in tqdm(sequences):
        results = {'sequence': sequence}
        
        for name, model in models.items():
            with torch.no_grad():
                batch = model.tokenizer([sequence], return_tensors="pt", padding=True)
                batch = {k: v.to(device) for k, v in batch.items()}
                
                outputs = model(batch)
                
                results[f'{name}_mean'] = float(outputs['predictions'].cpu().numpy()[0])
                results[f'{name}_std'] = float(outputs['uncertainties'].cpu().numpy()[0])
        
        results_list.append(results)
    
    return pd.DataFrame(results_list)

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [157]:

fitness = get_fitness()


In [158]:

exact_fitness = get_exact_fitness()


In [31]:
model = PartialEnsembleModuleWithFeatures.load_model('/home/naka/code/egfr_binder_rd2/notebooks/expression_model.pt').cuda()

  saved_dict = torch.load(load_path)
Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t6_8M_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded from: /home/naka/code/egfr_binder_rd2/notebooks/expression_model.pt
Loaded adapter state dict keys: ['base_model.model.encoder.layer.0.attention.self.query.lora_A.weight', 'base_model.model.encoder.layer.0.attention.self.query.lora_B.weight', 'base_model.model.encoder.layer.0.attention.self.key.lora_A.weight', 'base_model.model.encoder.layer.0.attention.self.key.lora_B.weight', 'base_model.model.encoder.layer.0.attention.self.value.lora_A.weight', 'base_model.model.encoder.layer.0.attention.self.value.lora_B.weight', 'base_model.model.encoder.layer.1.attention.self.query.lora_A.weight', 'base_model.model.encoder.layer.1.attention.self.query.lora_B.weight', 'base_model.model.encoder.layer.1.attention.self.key.lora_A.weight', 'base_model.model.encoder.layer.1.attention.self.key.lora_B.weight', 'base_model.model.encoder.layer.1.attention.self.value.lora_A.weight', 'base_model.model.encoder.layer.1.attention.self.value.lora_B.weight', 'base_model.model.encoder.layer.2.attentio

  saved_dict = torch.load(load_path)


In [34]:
models = {'expression': model}

In [143]:
fitness = fitness.drop(columns = ['target_sequence'])

In [144]:
sequences = fitness['binder_sequence'].tolist()
df = predict_sequences(sequences, models)

  results[f'{name}_mean'] = float(outputs['predictions'].cpu().numpy()[0])
  results[f'{name}_std'] = float(outputs['uncertainties'].cpu().numpy()[0])
100%|██████████| 7959/7959 [00:29<00:00, 272.71it/s]


In [145]:
fitness = fitness.merge(df, right_on='sequence', left_on='binder_sequence')


In [146]:

exact_fitness = exact_fitness.merge(df, right_on='sequence', left_on='binder_sequence')

In [147]:
fitness['expression_rank'] = fitness['expression_mean'].rank(pct=True)
exact_fitness['expression_rank'] = exact_fitness['expression_mean'].rank(pct=True)


In [148]:
fitness['e_fitness'] = (fitness['pae_interaction_rank'] + fitness['i_ptm_rank'] + fitness['exact_sequence_log_pll_rank'] + fitness['expression_rank']) / 4
exact_fitness['e_fitness'] = (exact_fitness['pae_interaction_rank'] + exact_fitness['i_ptm_rank'] + exact_fitness['sequence_log_pll_rank'] + exact_fitness['expression_rank']) / 4


In [149]:
fdf = fitness[~fitness['binder_sequence'].isin(exact_fitness['binder_sequence'])]

In [130]:
f_cols = [
    'sequence_hash',
    'binder_length',
    'exact_fitness',
    'e_fitness',
    'pae_interaction', 
    'i_ptm', 
    'exact_sequence_log_pll', 
    'expression_mean',
    'p_soluble', 
    'pae_interaction_rank', 
    'i_ptm_rank',
    'exact_sequence_log_pll_rank',
    'expression_rank',
    'p_soluble_rank', 
    
    'binder_charged_fraction', 
    'binder_hydrophobic_fraction',
    # 'binder_hydrophobicity',
    # 'binder_hydropathy', 'binder_solubility',
    # 'parent_hash', 
    # 'parent_sequence', 
    'mutations', 
    'binder_sequence',
]

In [131]:
def color_scale(data):
    """
    Takes a series and returns a color scale normalized to that column's range
    """
    def _color_value(val, min_val, max_val):
        if pd.isna(val):
            return ''
        
        # Don't apply to non-numeric columns
        if not isinstance(val, (int, float)):
            return ''
            
        # Normalize value to the column's range
        if max_val == min_val:
            normalized_val = 0
        else:
            normalized_val = (val - min_val) / (max_val - min_val)
        
        # Create blue color scale
        intensity = normalized_val
        return f'background-color: rgba(0,0,255,{intensity})'
    
    # Get the min and max values for this column
    min_val = data.min()
    max_val = data.max()
    return [_color_value(v, min_val, max_val) for v in data]


In [132]:
fdf[f_cols].sort_values('exact_fitness', ascending=False).round(2).query('expression_mean > 0.3').head(10)['binder_sequence'].tolist()

['SYDGKCLNNGACRYIERLDSYTCHCVSGYTGDRCQTRDLRWLELR',
 'KYDGYCNNNGVCHHIESLDKYTCNCRVGYSGDRCQTRDLRWLELRY',
 'SYDGYCNNHGVCRHIESLDSWTCQCRQGYEGDRCQTRDLRWLELN',
 'SYKGYCNNHGVCRHIESLDTYTCQCKQGYEGDRCETRDLRWLELR',
 'TYDGYCLNGGKCEHVESLDKYTCNCVSGYTGDRCETRDLRWLEHR',
 'TYDGYCLNGGKCRQVESLDKYTCNCVSGYTGDRCQTRDLRWLEQR',
 'PYDGYCLNGGVCMHIESLDKGTCECVEGYTGDRCQTRDLRWLELR',
 'SNCPERYRGHCENNGSCKYVRNLNTYTCQCLSGYTGARCDMLDIRYLL',
 'PYKGYCLNGGVCMHIESLDKYTCECVIGYTGDRCQDRDLRWLELR',
 'CPRRYNGICTNNGSCQYAINLRTYTCQCLPGYTKPKCQELDIRY']

In [134]:

# Prepare display dataframe
display_df = fdf[f_cols].sort_values('exact_fitness', ascending=False).round(2).query('expression_mean > 0.3').query('i_ptm >0.93').head(300)

# Apply the styling to numeric columns
numeric_cols = display_df.select_dtypes(include=['float64', 'int64']).columns
styled_df = display_df.style.format(precision=2)

for col in numeric_cols:
    styled_df = styled_df.apply(color_scale, subset=[col])

styled_df

Unnamed: 0,sequence_hash,binder_length,exact_fitness,e_fitness,pae_interaction,i_ptm,exact_sequence_log_pll,expression_mean,p_soluble,pae_interaction_rank,i_ptm_rank,exact_sequence_log_pll_rank,expression_rank,p_soluble_rank,binder_charged_fraction,binder_hydrophobic_fraction,mutations,binder_sequence
343,bdr_89bef4,45,0.84,0.85,8.1,0.94,-87.25,0.6,0.1,0.81,0.88,0.83,0.87,0.53,0.31,0.27,N24H,SYDGKCLNNGACRYIERLDSYTCHCVSGYTGDRCQTRDLRWLELR
943,bdr_0cc5cb,46,0.84,0.86,8.11,0.94,-86.92,0.68,0.17,0.8,0.88,0.83,0.92,0.62,0.33,0.28,"S1K,L7N,S20K,R46Y",KYDGYCNNNGVCHHIESLDKYTCNCRVGYSGDRCQTRDLRWLELRY
2,bdr_f06b8f,45,0.84,0.83,7.96,0.94,-90.36,0.53,0.17,0.86,0.88,0.77,0.82,0.63,0.31,0.24,"R9H,N21W,D26R,R45N",SYDGYCNNHGVCRHIESLDSWTCQCRQGYEGDRCQTRDLRWLELN
17,bdr_e27bff,45,0.84,0.85,8.13,0.94,-86.9,0.62,0.27,0.8,0.88,0.83,0.88,0.69,0.36,0.24,"D3K,R9H,D35E",SYKGYCNNHGVCRHIESLDTYTCQCKQGYEGDRCETRDLRWLELR
1604,bdr_52210e,48,0.83,0.82,8.18,0.94,-85.5,0.48,0.06,0.77,0.88,0.85,0.78,0.41,0.23,0.27,"R5E,A20V,R24N,Q40D",SNCPERYRGHCENNGSCKYVRNLNTYTCQCLSGYTGARCDMLDIRYLL
518,bdr_2c7798,44,0.83,0.79,7.75,0.94,-96.17,0.35,0.05,0.94,0.88,0.68,0.65,0.37,0.18,0.25,R36K,CPRRYNGICTNNGSCQYAINLRTYTCQCLPGYTKPKCQELDIRY
928,bdr_db8027,45,0.83,0.82,8.23,0.94,-85.61,0.5,0.1,0.74,0.88,0.85,0.79,0.53,0.31,0.27,"G10A,T36Q",EYKGYCLNNARCRHVQSLDRYTCNCVSGYTGDRCQQRDLRWLELR
314,bdr_f12f5e,45,0.82,0.82,8.14,0.94,-88.78,0.51,0.08,0.79,0.88,0.8,0.8,0.47,0.27,0.29,R37L,SYDGKCLNNGACRYIERLDSYTCNCVSGYTGDRCQTLDLRWLELR
70,bdr_3ef74b,46,0.82,0.8,8.18,0.94,-87.55,0.42,0.17,0.76,0.88,0.82,0.71,0.63,0.35,0.24,K7H,SYDGYCHNGGQCRHIHSIDSYTCNCRVGYEGDRCQSRDLRWLELRE
1402,bdr_5ed881,48,0.82,0.82,7.89,0.94,-94.84,0.52,0.08,0.89,0.88,0.69,0.81,0.46,0.27,0.23,Q40P,SNCPRRYRGICENNGSCKYRENLRTYTCQCRSGYTGARCPELDIRYLL


In [90]:
display_df['binder_sequence'].iloc[:4].tolist()

['SYEGYCENGGTLQHIESLDSYTCKCLKGYTGDRCQSQDLRYLYLE',
 'CPARYNGICTNHGRCQYARNLRTYTCQCLPGYTKHRCQELDIRY',
 'SYNGYCLNNGRCQHIMSLDSYTCRCEVGYSGDRCQTHDLRWLELR',
 'HYDGYCLNGGACRRIESLHSYTCQCQKGYSGDRCQTRDLRWLELQN']

In [23]:
fitness.query('binder_length == 55').sort_values('i_ptm', ascending=False).head(10)

Unnamed: 0,seq_hash,binder_sequence,binder_length,target_length,model_number,binder_plddt,binder_pae,pae_interaction,ptm,i_ptm,binder_charged_fraction,binder_hydrophobic_fraction,parent_hash,parent_sequence,mutations,binder_hydrophobicity,binder_hydropathy,binder_solubility,p_soluble,sequence_hash,sequence,sequence_length,normalized_log_pll,sequence_log_pll,mean_token_probability,min_token_probability,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,p_soluble_rank,fitness,exact_sequence_hash,exact_sequence,exact_sequence_length,exact_normalized_log_pll,exact_sequence_log_pll,exact_sequence_log_pll_rank,exact_fitness
694,1f6276,YSLSACPKRYSGVCSNAGVCHLAVSLGSYTCTCQTGYQGERCQTYDLRYILLELE,55,622,1,91.812,3.473683,7.53127,0.87,0.95,0.163636,0.309091,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,"T1Y,G8K,G17A,P40E,I49Y",0.306758,0.492929,0.537036,0.045618,bdr_fdbbeb,YSLSACPKRYSGVCSNAGVCHLAVSLGSYTCTCQTGYQGERCQTYDLRYILLELE,55.0,-4.094093,-225.175125,,,0.979897,0.973482,0.549217,0.341621,0.711054,fdbbeb,YSLSACPKRYSGVCSNAGVCHLAVSLGSYTCTCQTGYQGERCQTYDLRYILLELE,55.0,-2.126485,-116.956699,0.269612,0.740997
1575,0c32ca,ELFSACPSKYRGACTNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLM,55,622,1,90.235636,3.760704,7.89263,0.87,0.94,0.181818,0.272727,e9c179,ELFSACPSNYRLACNNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLQ,"N9K,L12G,N15T,Q55M",0.319451,0.484646,0.535019,0.04906,bdr_910021,ELFSACPSKYRGACTNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLM,55.0,-4.620086,-254.104736,,,0.882158,0.880637,0.456222,0.361857,0.645219,910021,ELFSACPSKYRGACTNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLM,55.0,-1.910156,-105.0586,0.545707,0.769501
1380,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55,622,1,89.347091,3.944542,8.103986,0.87,0.94,0.145455,0.309091,e9c179,ELFSACPSNYRLACNNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLQ,"R11L,Q32I",0.335575,0.510303,0.543909,0.035542,bdr_17fa18,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-3.425479,-188.401352,,,0.801217,0.880637,0.682011,0.27417,0.659509,17fa18,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-2.025817,-111.419908,0.392909,0.691588
493,fb0fca,TSLSACPGRYSGVCSNGGVCHLAVSLGRYTCTCQTGYQGPRCQTYDLRIILLELE,55,622,1,89.424545,4.227091,8.152867,0.87,0.94,0.145455,0.290909,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,S28R,0.299348,0.504848,0.538376,0.043462,bdr_a2483c,TSLSACPGRYSGVCSNGGVCHLAVSLGRYTCTCQTGYQGPRCQTYDLRIILLELE,55.0,-2.093213,-115.126694,,,0.776617,0.880637,0.928628,0.328396,0.72857,a2483c,TSLSACPGRYSGVCSNGGVCHLAVSLGRYTCTCQTGYQGPRCQTYDLRIILLELE,55.0,-2.166444,-119.154437,0.236539,0.631265
602,b1e012,TSLSACPGRYSGVCSNGGTCHLAVSLGSYTCTCQTGYQGPRCQTYDLRWILLELE,55,622,1,90.095091,3.93157,7.969513,0.87,0.94,0.127273,0.272727,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,"V19T,I49W",0.289811,0.491515,0.538149,0.043819,bdr_ea294c,TSLSACPGRYSGVCSNGGTCHLAVSLGSYTCTCQTGYQGPRCQTYDLRWILLELE,55.0,-2.793515,-153.643326,,,0.856368,0.880637,0.808968,0.331041,0.719254,ea294c,TSLSACPGRYSGVCSNGGTCHLAVSLGSYTCTCQTGYQGPRCQTYDLRWILLELE,55.0,-2.095713,-115.26424,0.304008,0.680338
2004,6ca873,ELFSACPRNYLGACNNGGVCRLACSLKSYTCICAPGYSGHRCQTLDLRYIELRLQ,55,622,1,90.332909,3.712377,7.825061,0.87,0.94,0.181818,0.290909,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"S8R,L12G,E24C,S27K,P40H",0.331801,0.49697,0.545528,0.033501,bdr_e02ee5,ELFSACPRNYLGACNNGGVCRLACSLKSYTCICAPGYSGHRCQTLDLRYIELRLQ,55.0,-4.779858,-262.892212,,,0.907155,0.880637,0.421863,0.257109,0.616691,e02ee5,ELFSACPRNYLGACNNGGVCRLACSLKSYTCICAPGYSGHRCQTLDLRYIELRLQ,55.0,-2.080672,-114.436951,0.323323,0.703705
739,2227ce,ELFSACPSRYHGACNNRGVCRLAESLSSYTCICASGYSGPRCQTLDLRYIELRLQ,55,622,1,90.673818,3.662529,7.786452,0.87,0.94,0.2,0.272727,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"N9R,L11H,L12G,G17R,P35S",0.313551,0.47899,0.536617,0.046315,bdr_3b9cd7,ELFSACPSRYHGACNNRGVCRLAESLSSYTCICASGYSGPRCQTLDLRYIELRLQ,55.0,-3.438047,-189.092606,,,0.920645,0.880637,0.677633,0.347044,0.70649,3b9cd7,ELFSACPSRYHGACNNRGVCRLAESLSSYTCICASGYSGPRCQTLDLRYIELRLQ,55.0,-2.021865,-111.202568,0.399259,0.733514
1810,43bd2e,ELFSACPSRYLLACNTGGVCRLAESLSSYTCICAPGYSGTRCQTLDLRYIELRLE,55,622,1,89.341455,3.891243,8.123738,0.87,0.94,0.181818,0.309091,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"N9R,N16T,P40T,Q55E",0.3294,0.515758,0.542123,0.037934,bdr_501fd3,ELFSACPSRYLLACNTGGVCRLAESLSSYTCICAPGYSGTRCQTLDLRYIELRLE,55.0,-4.095197,-225.23584,,,0.791297,0.880637,0.548687,0.292289,0.628228,501fd3,ELFSACPSRYLLACNTGGVCRLAESLSSYTCICAPGYSGTRCQTLDLRYIELRLE,55.0,-2.191429,-120.52861,0.223045,0.63166
1786,87076b,SLFSACPSRYTGACHNGGVCRLAISLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55,622,1,91.141455,3.543117,7.750459,0.87,0.94,0.145455,0.290909,82b7dd,SLFSACPSRYLGACHNGGVCRLATSLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"L11T,T24I",0.332007,0.512929,0.555895,0.022878,bdr_c4e8c6,SLFSACPSRYTGACHNGGVCRLAISLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-4.038197,-222.10083,,,0.931226,0.880637,0.561953,0.147996,0.630453,c4e8c6,SLFSACPSRYTGACHNGGVCRLAISLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-1.944401,-106.942031,0.503903,0.771922
873,dab460,TSLSACPARYSGVCSNGAVCHLAVSLDSYTCTCQTGYQGPRCQTYDLRIILLFLD,55,622,1,90.161455,4.124294,7.927026,0.87,0.94,0.127273,0.309091,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,"G8A,G18A,G27D,E53F,E55D",0.316226,0.527677,0.551212,0.027193,bdr_66e284,TSLSACPARYSGVCSNGAVCHLAVSLDSYTCTCQTGYQGPRCQTYDLRIILLFLD,55.0,-2.71711,-149.44104,,,0.870123,0.880637,0.837357,0.199048,0.696791,66e284,TSLSACPARYSGVCSNGAVCHLAVSLDSYTCTCQTGYQGPRCQTYDLRIILLFLD,55.0,-2.162216,-118.921873,0.240111,0.663624


In [155]:
fitness.query('binder_length == 241').sort_values('fitness')

Unnamed: 0,seq_hash,binder_sequence,binder_length,target_length,model_number,binder_plddt,binder_pae,pae_interaction,ptm,i_ptm,binder_charged_fraction,binder_hydrophobic_fraction,parent_hash,parent_sequence,mutations,binder_hydrophobicity,binder_hydropathy,binder_solubility,p_soluble,sequence_hash,sequence_x,sequence_length,normalized_log_pll,sequence_log_pll,mean_token_probability,min_token_probability,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,p_soluble_rank,fitness,exact_sequence_hash,exact_sequence,exact_sequence_length,exact_normalized_log_pll,exact_sequence_log_pll,exact_sequence_log_pll_rank,exact_fitness,sequence_y,expression_mean,expression_std,expression_rank,e_fitness
7863,89e65f,NYDSECDLSHDNYALADGVCMYIEALDKYACTCPPGYIGERCQLPDRRWWELHGGEGGEGGEGGEGGEWGPGGLGGRGGEGGRGGEGVHGGEGGERGRGGEGGEGGEGGSGYEGNRRWSCGRGQTGGRGGEGGRCGEGGEGIRGGPGREGGEGGEGGEGGRGGEGGRGGEGFRGGEGGRGGEEGEGGRNSESECPLSHDGYCLHDGDCMYIEANTKGACNDAVGYIGERCQYRDLKWHEIR,241,622,1,30.709378,26.308753,27.207666,0.48,0.31,0.319502,0.149378,ef9a68,NSDSECDLSHDGYALADGVCMYIEALDKYACTCPVGYIGERCQYPDRKWWELHGGEGGEGGEGGEGGEWGPGGLGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGNRGWSGGRGGEGGRGGEGGRCGEGGEGGRGGEGREGGEGGEGGEGGRGGEGGRGGEGFRGGEGGRGGEEGEGGRNSDSECPLSHDGYCLHDGDCMYIEANTKYACNCAVGYIGERCQYRDLKWWEIR,"S2Y,G12N,V35P,Y44L,K48R,G88V,R89H,E110S,G112Y,G117R,G120C,G124Q,E125T,G142I,E146P,D191E,Y217G,C221D,W238H",0.226807,0.376533,0.405798,0.872806,bdr_58a1a7,NYDSECDLSHDNYALADGVCMYIEALDKYACTCPPGYIGERCQLPDRRWWELHGGEGGEGGEGGEGGEWGPGGLGGRGGEGGRGGEGVHGGEGGERGRGGEGGEGGEGGSGYEGNRRWSCGRGQTGGRGGEGGRCGEGGEGIRGGPGREGGEGGEGGEGGRGGEGGRGGEGFRGGEGGRGGEEGEGGRNSESECPLSHDGYCLHDGDCMYIEANTKGACNDAVGYIGERCQYRDLKWHEIR,243.0,-6.541137,-1589.496216,,,0.01558,0.026699,0.000758,0.861666,0.226176,58a1a7,NYDSECDLSHDNYALADGVCMYIEALDKYACTCPPGYIGERCQLPDRRWWELHGGEGGEGGEGGEGGEWGPGGLGGRGGEGGRGGEGVHGGEGGERGRGGEGGEGGEGGSGYEGNRRWSCGRGQTGGRGGEGGRCGEGGEGIRGGPGREGGEGGEGGEGGRGGEGGRGGEGFRGGEGGRGGEEGEGGRNSESECPLSHDGYCLHDGDCMYIEANTKGACNDAVGYIGERCQYRDLKWHEIR,241.0,-1.51357,-364.770278,0.000127,0.014135,NYDSECDLSHDNYALADGVCMYIEALDKYACTCPPGYIGERCQLPDRRWWELHGGEGGEGGEGGEGGEWGPGGLGGRGGEGGRGGEGVHGGEGGERGRGGEGGEGGEGGSGYEGNRRWSCGRGQTGGRGGEGGRCGEGGEGIRGGPGREGGEGGEGGEGGRGGEGGRGGEGFRGGEGGRGGEEGEGGRNSESECPLSHDGYCLHDGDCMYIEANTKGACNDAVGYIGERCQYRDLKWHEIR,-0.848195,0.145338,0.009024,0.012857
7857,1cb925,NSDNECPLSHDGYCLYHGVCMYIEALSKYHCTCPGGYIGKRCQYFDLPWWELHGGAGCEGGEGGEGGEGDPGWEGGRGGEGFRGGEGGRGGEGGERGRGGEKGEGGEGGEGGEGGLGGENGRGGEGGRGGEGGRCYEEGEGGRGGEGGEGGEGGEGGEGGAGGEGGRGGEGGRGGESGRGGEGGEGGRNSDSECPLSHDGYCLHDNVCMYIHAGTKYACNCPVGYIGERCQGRDLEKWELR,241,622,1,29.330954,26.196912,27.658043,0.54,0.35,0.307054,0.149378,425af6,NSDSECPLSHDGYCLHDGVCMYIEALSKYHCTCPVGYIGERCQYRDLKWWELHGGAGCEGGEGGEGGEGDPGWEGGRGGEGFRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGWGGENGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGESGRGGEGGEGGRNSDSECPLSHDGYCLHDGVCMYIEALTKYACNCPVGYIGERCQGRDLKKWELR,"S4N,H16Y,D17H,V35G,E40K,R45F,K48P,G102K,W116L,G136Y,G138E,R161A,G206N,E212H,L214G,K236E",0.225288,0.383817,0.407834,0.864001,bdr_5806d0,NSDNECPLSHDGYCLYHGVCMYIEALSKYHCTCPGGYIGKRCQYFDLPWWELHGGAGCEGGEGGEGGEGDPGWEGGRGGEGFRGGEGGRGGEGGERGRGGEKGEGGEGGEGGEGGLGGENGRGGEGGRGGEGGRCYEEGEGGRGGEGGEGGEGGEGGEGGAGGEGGRGGEGGRGGESGRGGEGGEGGRNSDSECPLSHDGYCLHDNVCMYIHAGTKYACNCPVGYIGERCQGRDLEKWELR,243.0,-6.114737,-1485.881104,,,0.012564,0.0463,0.00139,0.858274,0.229632,5806d0,NSDNECPLSHDGYCLYHGVCMYIEALSKYHCTCPGGYIGKRCQYFDLPWWELHGGAGCEGGEGGEGGEGDPGWEGGRGGEGFRGGEGGRGGEGGERGRGGEKGEGGEGGEGGEGGLGGENGRGGEGGRGGEGGRCYEEGEGGRGGEGGEGGEGGEGGEGGAGGEGGRGGEGGRGGESGRGGEGGEGGRNSDSECPLSHDGYCLHDNVCMYIHAGTKYACNCPVGYIGERCQGRDLEKWELR,241.0,-1.19902,-288.963801,0.002791,0.020552,NSDNECPLSHDGYCLYHGVCMYIEALSKYHCTCPGGYIGKRCQYFDLPWWELHGGAGCEGGEGGEGGEGDPGWEGGRGGEGFRGGEGGRGGEGGERGRGGEKGEGGEGGEGGEGGLGGENGRGGEGGRGGEGGRCYEEGEGGRGGEGGEGGEGGEGGEGGAGGEGGRGGEGGRGGESGRGGEGGEGGRNSDSECPLSHDGYCLHDNVCMYIHAGTKYACNCPVGYIGERCQGRDLEKWELR,-0.369285,0.107791,0.090362,0.038004
7856,c0c4e8,NSDSECELSHDGYCLHRGVCMYIEALDKYACTCPAGYIGERCPYRDLKWWELHGGEGGEGGEGGEGAEGGPGHEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGWGAEGGRGGEGGPGGEGGSCGEAGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGTRGGEGGIGGEGGEGGRNSDSECPLSHDGKCLTKGNCMYIEALTKYACNFPPGYIGERCQYRDLKWWELR,241,622,1,30.221618,26.119905,27.551306,0.51,0.29,0.307054,0.136929,2fed46,NSDSECELSHDGYCLHRGVCMYIEALDKYACTCDAGYIGERCPYRDLKWWELHGGEGGEGGEGGEGGEGGPGHEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEAGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGIGGEGGEGGRNSDSECPLSHDGYCLTDGNCMYIEALTKYACNCPPGYIGERCQYRDLKWWELR,"D34P,G67A,R116W,G118A,R128P,R134S,G172T,Y201K,D205K,C221F",0.223127,0.384325,0.396813,0.906025,bdr_104383,NSDSECELSHDGYCLHRGVCMYIEALDKYACTCPAGYIGERCPYRDLKWWELHGGEGGEGGEGGEGAEGGPGHEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGWGAEGGRGGEGGPGGEGGSCGEAGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGTRGGEGGIGGEGGEGGRNSDSECPLSHDGKCLTKGNCMYIEALTKYACNFPPGYIGERCQYRDLKWWELR,243.0,-4.297083,-1044.191162,,,0.013946,0.021674,0.007332,0.878502,0.230363,104383,NSDSECELSHDGYCLHRGVCMYIEALDKYACTCPAGYIGERCPYRDLKWWELHGGEGGEGGEGGEGAEGGPGHEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGWGAEGGRGGEGGPGGEGGSCGEAGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGTRGGEGGIGGEGGEGGRNSDSECPLSHDGKCLTKGNCMYIEALTKYACNFPPGYIGERCQYRDLKWWELR,241.0,-0.949543,-228.839757,0.010148,0.015256,NSDSECELSHDGYCLHRGVCMYIEALDKYACTCPAGYIGERCPYRDLKWWELHGGEGGEGGEGGEGAEGGPGHEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGWGAEGGRGGEGGPGGEGGSCGEAGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGTRGGEGGIGGEGGEGGRNSDSECPLSHDGKCLTKGNCMYIEALTKYACNFPPGYIGERCQYRDLKWWELR,-0.332248,0.082072,0.106279,0.038012
7852,b12d16,NSDSECELSHDGYCLHDQHCWYQEALSKYHCTCPIGYIGERCQYRDLKWWELHGGAGCEGGEGGEGGEGDPGWERGRGGEGTRGGEGGRGREGGERGRGGEGGEGGEGGEGGAGGWCGENGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGESGGGGEYGEGGRPSDSECPLSHDGSCLHMGVCMYITALTKYACNCPGGYIGERCDGRDLKKWELR,241,622,1,29.820083,26.415861,27.55362,0.53,0.38,0.319502,0.13278,425af6,NSDSECPLSHDGYCLHDGVCMYIEALSKYHCTCPVGYIGERCQYRDLKWWELHGGAGCEGGEGGEGGEGDPGWEGGRGGEGFRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGWGGENGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGESGRGGEGGEGGRNSDSECPLSHDGYCLHDGVCMYIEALTKYACNCPVGYIGERCQGRDLKKWELR,"P7E,G18Q,V19H,M21W,I23Q,V35I,G75R,F82T,G91R,E113A,G117C,R179G,G183Y,N189P,Y201S,D205M,E212T,V223G,Q231D",0.218586,0.375426,0.409077,0.858378,bdr_766732,NSDSECELSHDGYCLHDQHCWYQEALSKYHCTCPIGYIGERCQYRDLKWWELHGGAGCEGGEGGEGGEGDPGWERGRGGEGTRGGEGGRGREGGERGRGGEGGEGGEGGEGGAGGWCGENGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGESGGGGEYGEGGRPSDSECPLSHDGSCLHMGVCMYITALTKYACNCPGGYIGERCDGRDLKKWELR,243.0,-6.582497,-1599.546631,,,0.013821,0.056791,0.000632,0.854379,0.231406,766732,NSDSECELSHDGYCLHDQHCWYQEALSKYHCTCPIGYIGERCQYRDLKWWELHGGAGCEGGEGGEGGEGDPGWERGRGGEGTRGGEGGRGREGGERGRGGEGGEGGEGGEGGAGGWCGENGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGESGGGGEYGEGGRPSDSECPLSHDGSCLHMGVCMYITALTKYACNCPGGYIGERCDGRDLKKWELR,241.0,-1.289905,-310.867078,0.001649,0.024087,NSDSECELSHDGYCLHDQHCWYQEALSKYHCTCPIGYIGERCQYRDLKWWELHGGAGCEGGEGGEGGEGDPGWERGRGGEGTRGGEGGRGREGGERGRGGEGGEGGEGGEGGAGGWCGENGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGESGGGGEYGEGGRPSDSECPLSHDGSCLHMGVCMYITALTKYACNCPGGYIGERCDGRDLKKWELR,-0.271303,0.108049,0.133475,0.051434
7842,fdfbe8,NSDSECPASHVGYGLHDGVCMTIETLDKYVCMCPPGYIGHRCQYRDLKWWELHGGCGGEGGEGGEGGEGLPGGEGGRGGEGGSGGEGGRGGEGGERGRGGEGGEGGEMGESGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGIGGRGGEGGRGGEGGRGGMGGRGGEGGEGGRNSDSECPLSHDGCCLHDGQCEYIEALTKYACNCPVGYIGETCQYRDLKWFELR,241,622,1,31.644357,26.649926,27.600609,0.54,0.45,0.294606,0.145228,2fa4c1,NSDSECPASHVGYCLHDGVCMYIETLDKYVCTCPPGYIGERCQYRDLKWWELHGGCGGEGGEGGEGGEGLPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEMGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGMGGRGGEGGEGGRNSDSECPLSHDGCCLHDGQCMYIEALTKYACNCPVGYIGERCQYRDLKWFELR,"C14G,Y22T,T32M,E40H,R83S,G111S,E158I,M209E,R229T",0.213685,0.39456,0.412344,0.842673,bdr_6186ca,NSDSECPASHVGYGLHDGVCMTIETLDKYVCMCPPGYIGHRCQYRDLKWWELHGGCGGEGGEGGEGGEGLPGGEGGRGGEGGSGGEGGRGGEGGERGRGGEGGEGGEMGESGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGIGGRGGEGGRGGEGGRGGMGGRGGEGGEGGRNSDSECPLSHDGCCLHDGQCEYIEALTKYACNCPVGYIGETCQYRDLKWFELR,243.0,-4.95102,-1203.0979,,,0.013318,0.084872,0.005056,0.848725,0.237993,6186ca,NSDSECPASHVGYGLHDGVCMTIETLDKYVCMCPPGYIGHRCQYRDLKWWELHGGCGGEGGEGGEGGEGLPGGEGGRGGEGGSGGEGGRGGEGGERGRGGEGGEGGEMGESGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGIGGRGGEGGRGGEGGRGGMGGRGGEGGEGGRNSDSECPLSHDGCCLHDGQCEYIEALTKYACNCPVGYIGETCQYRDLKWFELR,241.0,-1.119231,-269.734572,0.003806,0.033999,NSDSECPASHVGYGLHDGVCMTIETLDKYVCMCPPGYIGHRCQYRDLKWWELHGGCGGEGGEGGEGGEGLPGGEGGRGGEGGSGGEGGRGGEGGERGRGGEGGEGGEMGESGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGIGGRGGEGGRGGEGGRGGMGGRGGEGGEGGRNSDSECPLSHDGCCLHDGQCEYIEALTKYACNCPVGYIGETCQYRDLKWFELR,-0.455735,0.091936,0.060033,0.040507
7831,b3cb91,NSDSECPLSFDGYCAHDGVCMYIEALDKYACTCPPGYIGERCMYWDPKSWELHFHEGGEGGEGGEGGEGGPEGEGGRGGEGGRGGEGGRGGEGGERGRGGECGEGGMGGEGMEQGRGGEGGRIGEGGRGGEGSRCGEGGEGGRGGEGGEGGEGGQGGEGGRGGEGGKGGETGRGGEGGRGGEGGEGGWNSDSEMALSHDGYNLHDGVCQQIEALTKYACIKPVTYGGERCHYRDLKWWEIY,241,622,1,27.894149,26.551134,26.730862,0.56,0.44,0.307054,0.157676,2f06e3,NSDSECPLSHDGYCLHDGVCMYIEALDKYACTCPPGYIGERCQYRDPKSWELHGHEGGEGGEGGEGGEGGPEGEGGRGGEGGRGGEGGRGGEGGERGRGGECGEGGMGGEGGEQGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGETGRGGEGGRGGEGGEGGWNSDSECPLSHDGYNLHDGVCQQIEALTKYACIKPVTYIGERCHYRDLKWWELR,"H10F,L15A,Q43M,R45W,G54F,G112M,G123I,G133S,E155Q,R167K,C194M,P195A,I226G,L240I,R241Y",0.228091,0.384002,0.403291,0.882971,bdr_f0df34,NSDSECPLSFDGYCAHDGVCMYIEALDKYACTCPPGYIGERCMYWDPKSWELHFHEGGEGGEGGEGGEGGPEGEGGRGGEGGRGGEGGRGGEGGERGRGGECGEGGMGGEGMEQGRGGEGGRIGEGGRGGEGSRCGEGGEGGRGGEGGEGGEGGQGGEGGRGGEGGKGGETGRGGEGGRGGEGGEGGWNSDSEMALSHDGYNLHDGVCQQIEALTKYACIKPVTYGGERCHYRDLKWWEIY,243.0,-6.030538,-1465.420654,,,0.019977,0.080601,0.001517,0.866692,0.242197,f0df34,NSDSECPLSFDGYCAHDGVCMYIEALDKYACTCPPGYIGERCMYWDPKSWELHFHEGGEGGEGGEGGEGGPEGEGGRGGEGGRGGEGGRGGEGGERGRGGECGEGGMGGEGMEQGRGGEGGRIGEGGRGGEGSRCGEGGEGGRGGEGGEGGEGGQGGEGGRGGEGGKGGETGRGGEGGRGGEGGEGGWNSDSEMALSHDGYNLHDGVCQQIEALTKYACIKPVTYGGERCHYRDLKWWEIY,241.0,-1.333132,-321.284731,0.001015,0.033864,NSDSECPLSFDGYCAHDGVCMYIEALDKYACTCPPGYIGERCMYWDPKSWELHFHEGGEGGEGGEGGEGGPEGEGGRGGEGGRGGEGGRGGEGGERGRGGECGEGGMGGEGMEQGRGGEGGRIGEGGRGGEGSRCGEGGEGGRGGEGGEGGEGGQGGEGGRGGEGGKGGETGRGGEGGRGGEGGEGGWNSDSEMALSHDGYNLHDGVCQQIEALTKYACIKPVTYGGERCHYRDLKWWEIY,-0.091169,0.161056,0.22484,0.081608
7830,7c226f,HSDSECPLSHSGYCQHTPVCMYIEALDKYACTCPSGYIGERCQMRDLKWWELHGGEGGEGGEGGEGPEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGHEGGEGGRGGEGGRGGEGGRGGEWGRCGEGGEGGRGGEFNEGGEGGEGGEFGRGGRGGRGGEGGFGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDTVCMMIEALRKYAANCPVGVIGLRMQYLWLTWWELK,241,622,1,36.775768,26.785145,27.264642,0.58,0.5,0.298755,0.165975,53c556,HSDSECPMSHDGYCLHTGVCMYIEALDKYACTCPSGYIGERCQMRDLKWWELHGGEGGEGGEGGEGGEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGHEGGEGGRGGEGGRGGEGGRGGEWGRCGEGGEGGRGGEFNEGGEGGEGGEFGRGGEGGRGGEGGFGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDTVCMMIEALRKYAANCPVGVIGERCQYRDLTWWELR,"M8L,D11S,L15Q,G18P,G67P,E164R,E228L,C230M,R233L,D234W,R241K",0.230752,0.393684,0.411902,0.84488,bdr_ffaffd,HSDSECPLSHSGYCQHTPVCMYIEALDKYACTCPSGYIGERCQMRDLKWWELHGGEGGEGGEGGEGPEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGHEGGEGGRGGEGGRGGEGGRGGEWGRCGEGGEGGRGGEFNEGGEGGEGGEFGRGGRGGRGGEGGFGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDTVCMMIEALRKYAANCPVGVIGLRMQYLWLTWWELK,243.0,-5.53181,-1344.229736,,,0.015329,0.10284,0.002781,0.849479,0.242607,ffaffd,HSDSECPLSHSGYCQHTPVCMYIEALDKYACTCPSGYIGERCQMRDLKWWELHGGEGGEGGEGGEGPEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGHEGGEGGRGGEGGRGGEGGRGGEWGRCGEGGEGGRGGEFNEGGEGGEGGEFGRGGRGGRGGEGGFGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDTVCMMIEALRKYAANCPVGVIGLRMQYLWLTWWELK,241.0,-1.296554,-312.469469,0.001395,0.039855,HSDSECPLSHSGYCQHTPVCMYIEALDKYACTCPSGYIGERCQMRDLKWWELHGGEGGEGGEGGEGPEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGHEGGEGGRGGEGGRGGEGGRGGEWGRCGEGGEGGRGGEFNEGGEGGEGGEFGRGGRGGRGGEGGFGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDTVCMMIEALRKYAANCPVGVIGLRMQYLWLTWWELK,-0.584504,0.125355,0.029452,0.037254
7829,74806f,NSDSECPLSHDGYCLTCGVCMYIEALDKYACTCPNGYIGERCQYRDLKWWELKGGEGGSGGEGGEWGEGGPGGEGGRGGEGGRKGKGGMGGEVGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGWRGGEGGRGGDYGEGGRNSDSECPLSSDGYCWHDGVCMYIEALTKYACCCPVGYIGERCQYRDLKNWELR,241,622,1,36.409253,26.726713,27.520654,0.61,0.55,0.298755,0.153527,ca5cee,NSDSECPLSHDGYCLHDGVCMYIEALDKYACTCPVGYIGERCQYRDLKWWELHGGEGGEGGEGGEGGEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDGVCMYIEALTKYACNCPVGYIGERCQYRDLKWWELR,"H16T,D17C,V35N,H53K,E59S,G66W,G84K,E86K,R89M,G93V,G172W,E182D,G183Y,H198S,L203W,N220C,W237N",0.226587,0.385938,0.417034,0.817687,bdr_014bac,NSDSECPLSHDGYCLTCGVCMYIEALDKYACTCPNGYIGERCQYRDLKWWELKGGEGGSGGEGGEWGEGGPGGEGGRGGEGGRKGKGGMGGEVGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGWRGGEGGRGGDYGEGGRNSDSECPLSSDGYCWHDGVCMYIEALTKYACCCPVGYIGERCQYRDLKNWELR,243.0,-4.422443,-1074.653687,,,0.014198,0.11176,0.006826,0.839553,0.243084,014bac,NSDSECPLSHDGYCLTCGVCMYIEALDKYACTCPNGYIGERCQYRDLKWWELKGGEGGSGGEGGEWGEGGPGGEGGRGGEGGRKGKGGMGGEVGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGWRGGEGGRGGDYGEGGRNSDSECPLSSDGYCWHDGVCMYIEALTKYACCCPVGYIGERCQYRDLKNWELR,241.0,-0.927839,-223.609239,0.011798,0.045919,NSDSECPLSHDGYCLTCGVCMYIEALDKYACTCPNGYIGERCQYRDLKWWELKGGEGGSGGEGGEWGEGGPGGEGGRGGEGGRKGKGGMGGEVGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGWRGGEGGRGGDYGEGGRNSDSECPLSSDGYCWHDGVCMYIEALTKYACCCPVGYIGERCQYRDLKNWELR,-0.393715,0.067522,0.080211,0.054492
7822,4119f8,NSDSECPLSHDTYCMHDMVCMYIEALDKYACHCPWGYIGERCQYRDLKWWELHPFEGGEGGNGGEGGEGGPGGEGGPGPEGGRGGEGGRGGEGGERGRGGYGGEKGEGGEGGEGGRGGEGGRGGEGGRGGYGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGHRGGEGGRGGEGGRGGEGGEGGRNSDSECPMSHDGYCLHDGVCMYIEALRKYMCNCPYGYIGERNQYRDLKWVELR,241,622,1,33.532573,26.569193,27.286731,0.6,0.52,0.315353,0.161826,ca5cee,NSDSECPLSHDGYCLHDGVCMYIEALDKYACTCPVGYIGERCQYRDLKWWELHGGEGGEGGEGGEGGEGGPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGEGGRGGEGGEGGRNSDSECPLSHDGYCLHDGVCMYIEALTKYACNCPVGYIGERCQYRDLKWWELR,"G12T,L15M,G18M,T32H,V35W,G54P,G55F,E62N,R77P,G79P,E101Y,G105K,E131Y,G166H,L196M,T215R,A218M,V223Y,C230N,W238V",0.233273,0.373582,0.41128,0.847937,bdr_03fab2,NSDSECPLSHDTYCMHDMVCMYIEALDKYACHCPWGYIGERCQYRDLKWWELHPFEGGEGGNGGEGGEGGPGGEGGPGPEGGRGGEGGRGGEGGERGRGGYGGEKGEGGEGGEGGRGGEGGRGGEGGRGGYGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGHRGGEGGRGGEGGRGGEGGEGGRNSDSECPMSHDGYCLHDGVCMYIEALRKYMCNCPYGYIGERNQYRDLKWVELR,243.0,-4.741074,-1152.081055,,,0.015077,0.108054,0.005435,0.850986,0.244888,03fab2,NSDSECPLSHDTYCMHDMVCMYIEALDKYACHCPWGYIGERCQYRDLKWWELHPFEGGEGGNGGEGGEGGPGGEGGPGPEGGRGGEGGRGGEGGERGRGGYGGEKGEGGEGGEGGRGGEGGRGGEGGRGGYGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGHRGGEGGRGGEGGRGGEGGEGGRNSDSECPMSHDGYCLHDGVCMYIEALRKYMCNCPYGYIGERNQYRDLKWVELR,241.0,-1.039611,-250.546168,0.005201,0.042777,NSDSECPLSHDTYCMHDMVCMYIEALDKYACHCPWGYIGERCQYRDLKWWELHPFEGGEGGNGGEGGEGGPGGEGGPGPEGGRGGEGGRGGEGGERGRGGYGGEKGEGGEGGEGGRGGEGGRGGEGGRGGYGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGHRGGEGGRGGEGGRGGEGGEGGRNSDSECPMSHDGYCLHDGVCMYIEALRKYMCNCPYGYIGERNQYRDLKWVELR,-0.590688,0.063409,0.028826,0.039289
7820,33d18a,NSDSECPASHVGYCGHDGVCMLIETLVVYVCTCPPGYIGERCQYRDLKKWELHGGCGGEGGEGGEGGHGLPGGEGGRGKEGGRGGEGGRGDEGLERGRGGEGGEGGEMGEGGEGGRGGEGGRGGEGGRGTEGGRCGEGGEGGRGGRGGEGGEGGEGGEGGRGGEGGRFGEGGRGGSGGRGKEGGEGGRPSDSECPLSHMGCCLHDGQCMYIEALTKYACNCPVGYIGERFQYRDLKWFKLR,241,622,1,36.219129,26.826868,26.902053,0.64,0.62,0.307054,0.157676,2fa4c1,NSDSECPASHVGYCLHDGVCMYIETLDKYVCTCPPGYIGERCQYRDLKWWELHGGCGGEGGEGGEGGEGLPGGEGGRGGEGGRGGEGGRGGEGGERGRGGEGGEGGEMGEGGEGGRGGEGGRGGEGGRGGEGGRCGEGGEGGRGGEGGEGGEGGEGGEGGRGGEGGRGGEGGRGGMGGRGGEGGEGGRNSDSECPLSHDGCCLHDGQCMYIEALTKYACNCPVGYIGERCQYRDLKWFELR,"L15G,Y22L,D27V,K28V,W49K,E68H,G79K,G91D,G94L,G130T,E146R,G168F,M176S,G181K,N189P,D199M,C230F,E239K",0.227276,0.396496,0.417563,0.814683,bdr_4e1a82,NSDSECPASHVGYCGHDGVCMLIETLVVYVCTCPPGYIGERCQYRDLKKWELHGGCGGEGGEGGEGGHGLPGGEGGRGKEGGRGGEGGRGDEGLERGRGGEGGEGGEMGEGGEGGRGGEGGRGGEGGRGTEGGRCGEGGEGGRGGRGGEGGEGGEGGEGGRGGEGGRFGEGGRGGSGGRGKEGGEGGRPSDSECPLSHMGCCLHDGQCMYIEALTKYACNCPVGYIGERFQYRDLKWFKLR,243.0,-6.00212,-1458.515259,,,0.018218,0.124639,0.001643,0.838673,0.245793,4e1a82,NSDSECPASHVGYCGHDGVCMLIETLVVYVCTCPPGYIGERCQYRDLKKWELHGGCGGEGGEGGEGGHGLPGGEGGRGKEGGRGGEGGRGDEGLERGRGGEGGEGGEMGEGGEGGRGGEGGRGGEGGRGTEGGRCGEGGEGGRGGRGGEGGEGGEGGEGGRGGEGGRFGEGGRGGSGGRGKEGGEGGRPSDSECPLSHMGCCLHDGQCMYIEALTKYACNCPVGYIGERFQYRDLKWFKLR,241.0,-1.320551,-318.252786,0.001142,0.048,NSDSECPASHVGYCGHDGVCMLIETLVVYVCTCPPGYIGERCQYRDLKKWELHGGCGGEGGEGGEGGHGLPGGEGGRGKEGGRGGEGGRGDEGLERGRGGEGGEGGEMGEGGEGGRGGEGGRGGEGGRGTEGGRCGEGGEGGRGGRGGEGGEGGEGGEGGRGGEGGRFGEGGRGGSGGRGKEGGEGGRPSDSECPLSHMGCCLHDGQCMYIEALTKYACNCPVGYIGERFQYRDLKWFKLR,-0.470661,0.081844,0.056523,0.050131


In [135]:
exact_fitness.columns
ef_cols = [
    'seq_hash', 
    'binder_length',
    'fitness',
    'e_fitness',
    'pae_interaction', 
    'i_ptm', 
    'sequence_log_pll', 
    'expression_mean',
    'p_soluble',
    'pae_interaction_rank',
    'i_ptm_rank',
    'sequence_log_pll_rank',
    'expression_rank',
    'p_soluble_rank',
    'binder_sequence',
]

In [136]:
# Prepare display dataframe
display_df = exact_fitness[ef_cols].sort_values('fitness', ascending=False).round(2).head(300)

# Apply the styling to numeric columns
numeric_cols = display_df.select_dtypes(include=['float64', 'int64']).columns
styled_df = display_df.style.format(precision=2)

for col in numeric_cols:
    styled_df = styled_df.apply(color_scale, subset=[col])

styled_df

Unnamed: 0,seq_hash,binder_length,fitness,e_fitness,pae_interaction,i_ptm,sequence_log_pll,expression_mean,p_soluble,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,expression_rank,p_soluble_rank,binder_sequence
0,6989fe,45,0.71,0.74,8.41,0.94,-75.25,0.37,0.08,0.85,0.3,0.99,0.81,0.82,SYDGYCLNRGVCQHIESLDSYTCKCLPGYTGDRCQTQDLRWLELR
1,6b74fb,45,0.7,0.67,8.4,0.94,-73.86,0.08,0.04,0.82,0.3,0.99,0.55,0.6,SYDGYCLNGGVCMHIESLDTYTCNCVIGYSGDRCQTLDLRWLELR
2,d5bf5a,46,0.69,0.7,8.47,0.94,-83.26,0.28,0.06,0.89,0.3,0.88,0.74,0.76,SYDGYCLNGGECRRIKSLHSYTCQCRIGYSGDRCQTRDLRYLELQQ
3,48cbe6,45,0.69,0.75,8.4,0.94,-78.97,0.52,0.16,0.82,0.3,0.94,0.94,0.94,SYDGYCLNRGECQHIHSLDSYTCKCEPGYTGDRCQTQDLRWLELR
4,1aeb7e,45,0.69,0.73,8.39,0.94,-77.95,0.42,0.09,0.81,0.3,0.95,0.86,0.87,SYDGYCLNNAVCRHIESLDSYTCVCKQGYTGDRCQTRDLRWLELR
5,e0c2db,46,0.69,0.68,8.42,0.94,-82.62,0.2,0.05,0.86,0.3,0.9,0.68,0.68,HYDGYCLNGGVCRRIESLHSYTCQCQIGYSGDRCQTRDLRWLELQW
6,403ba6,45,0.68,0.74,8.26,0.94,-72.54,0.47,0.12,0.73,0.3,1.0,0.91,0.89,EYDGYCLNGGVCMHIESLDKYTCECVIGYTGDRCQTRDLRWLELR
7,c38849,45,0.68,0.74,8.03,0.95,-84.91,0.54,0.17,0.45,0.74,0.83,0.95,0.94,SYEGYCENGGTLQHIESLDSYTCKCLKGYTGDRCQSQDLRYLYLE
8,096164,46,0.67,0.66,8.38,0.94,-81.83,0.12,0.04,0.8,0.3,0.92,0.63,0.64,SYDGYCLNGGVCHRIESLHSYTCQCRIGYSGDRCQTRDLRWLELQM
9,99a9f1,48,0.67,0.74,8.81,0.94,-88.76,0.56,0.08,0.95,0.3,0.76,0.95,0.86,SNCPRRYRGICENNGSCRYRHNLRTYTCQCHSGYTGARCEELDIRYLL


In [151]:
# Prepare display dataframe
display_df = exact_fitness[ef_cols].sort_values(['i_ptm', 'pae_interaction', 'sequence_log_pll', 'expression_mean'], ascending=[False, True, False, False]).round(2).head(300)

# Apply the styling to numeric columns
numeric_cols = display_df.select_dtypes(include=['float64', 'int64']).columns
styled_df = display_df.style.format(precision=2)

for col in numeric_cols:
    styled_df = styled_df.apply(color_scale, subset=[col])

styled_df

Unnamed: 0,seq_hash,binder_length,fitness,e_fitness,pae_interaction,i_ptm,sequence_log_pll,expression_mean,p_soluble,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,expression_rank,p_soluble_rank,binder_sequence
14,50b07f,54,0.69,0.63,7.79,0.95,-114.58,-0.02,0.02,1.0,0.75,0.31,0.45,0.25,SLFSKCPRRYHGICGNNGLCRYAINLRTYTCRCVSGYTGYRCQELDIPYLLRLN
20,0f647e,54,0.66,0.56,7.79,0.95,-115.1,-0.2,0.02,0.99,0.75,0.25,0.24,0.35,GLFSRCPKRYHGICINNGQCRYAINLRTYTCICVSGYTGDRCQELDIRYLLLLN
49,2ba912,54,0.6,0.45,7.79,0.95,-123.31,-0.63,0.01,0.99,0.75,0.06,0.02,0.01,GLFSYCPYRYHGICKNNGQCRYAISLRSGTCHCVSGYTGYRCQEIDIRYLLLFY
24,3305e8,54,0.65,0.55,7.79,0.95,-115.44,-0.22,0.02,0.98,0.75,0.22,0.23,0.37,TLFSRCPKRYHGICINNGQCRYAINLRTYTCICVSGYTGDRCQELDIRYLLLLN
38,c9f2df,54,0.62,0.5,7.8,0.95,-117.64,-0.4,0.03,0.98,0.75,0.15,0.14,0.46,SLFSLCPSKFHGICNNKGVCRYAINLRSYTCHCLEGYTGPRCQEIDIRYLLLQY
5,7108f2,54,0.74,0.64,7.82,0.95,-112.23,-0.1,0.02,0.97,0.75,0.51,0.34,0.08,SLFSRCPRRYHGICGNNGRCRYAINLRTQTCRCYSGYTGYRCQELDIRYLLLLN
11,4f9c3b,54,0.71,0.65,7.83,0.95,-113.47,-0.02,0.02,0.97,0.75,0.43,0.46,0.31,SLFNKCPRRYHGICGNNGRCRYAINLRTYTCRCVSGYTGYRCQELDIRYLLLLN
1,c1141a,54,0.78,0.75,7.83,0.95,-109.6,0.16,0.04,0.96,0.75,0.63,0.65,0.58,SLFSRCPKRYHGICNNNGQCRYAINLRTYTCICKSGYTGDRCQELDIRYLLLLN
4,12e3a0,54,0.75,0.58,7.84,0.95,-111.84,-0.53,0.02,0.95,0.75,0.54,0.08,0.12,GLFSICPRRYQGICKNNGTCRYALNLRTYTCQCVSGYTGARCQELDIRYLLLRY
15,5e7b9d,54,0.68,0.54,7.85,0.95,-114.38,-0.44,0.02,0.94,0.75,0.35,0.12,0.16,SLFSLCPSKFHGICNNRGVCRYAINLRSYTCICLEGYTGDRCQEIDIRYLLLQY
