In [156]:
from egfr_binder_rd2.fitness import get_fitness, get_exact_fitness
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from egfr_binder_rd2.bt import PartialEnsembleModuleWithFeatures
import torch
from tqdm import tqdm

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_colwidth', None)


def predict_sequences(sequences, models, device='cuda'):

    """Run inference on a list of sequences."""
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
    results_list = []
    
    for sequence in tqdm(sequences):
        results = {'sequence': sequence}
        
        for name, model in models.items():
            with torch.no_grad():
                batch = model.tokenizer([sequence], return_tensors="pt", padding=True)
                batch = {k: v.to(device) for k, v in batch.items()}
                
                outputs = model(batch)
                
                results[f'{name}_mean'] = float(outputs['predictions'].cpu().numpy()[0])
                results[f'{name}_std'] = float(outputs['uncertainties'].cpu().numpy()[0])
        
        results_list.append(results)
    
    return pd.DataFrame(results_list)

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [157]:

fitness = get_fitness()


In [158]:

exact_fitness = get_exact_fitness()


In [31]:
model = PartialEnsembleModuleWithFeatures.load_model('/home/naka/code/egfr_binder_rd2/notebooks/expression_model.pt').cuda()

  saved_dict = torch.load(load_path)
Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t6_8M_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded from: /home/naka/code/egfr_binder_rd2/notebooks/expression_model.pt
Loaded adapter state dict keys: ['base_model.model.encoder.layer.0.attention.self.query.lora_A.weight', 'base_model.model.encoder.layer.0.attention.self.query.lora_B.weight', 'base_model.model.encoder.layer.0.attention.self.key.lora_A.weight', 'base_model.model.encoder.layer.0.attention.self.key.lora_B.weight', 'base_model.model.encoder.layer.0.attention.self.value.lora_A.weight', 'base_model.model.encoder.layer.0.attention.self.value.lora_B.weight', 'base_model.model.encoder.layer.1.attention.self.query.lora_A.weight', 'base_model.model.encoder.layer.1.attention.self.query.lora_B.weight', 'base_model.model.encoder.layer.1.attention.self.key.lora_A.weight', 'base_model.model.encoder.layer.1.attention.self.key.lora_B.weight', 'base_model.model.encoder.layer.1.attention.self.value.lora_A.weight', 'base_model.model.encoder.layer.1.attention.self.value.lora_B.weight', 'base_model.model.encoder.layer.2.attentio

  saved_dict = torch.load(load_path)


In [34]:
models = {'expression': model}

In [143]:
fitness = fitness.drop(columns = ['target_sequence'])

In [168]:
sequences = fitness['binder_sequence'].tolist()
df = predict_sequences(sequences, models)

  results[f'{name}_mean'] = float(outputs['predictions'].cpu().numpy()[0])
  results[f'{name}_std'] = float(outputs['uncertainties'].cpu().numpy()[0])
100%|██████████| 8009/8009 [00:30<00:00, 265.97it/s]


In [171]:
fitness = fitness.merge(df, right_on='sequence', left_on='binder_sequence')


In [169]:

exact_fitness = exact_fitness.merge(df, right_on='sequence', left_on='binder_sequence')

In [172]:
fitness['expression_rank'] = fitness['expression_mean'].rank(pct=True)
exact_fitness['expression_rank'] = exact_fitness['expression_mean'].rank(pct=True)


In [175]:
fitness['e_fitness'] = (fitness['pae_interaction_rank'] + fitness['i_ptm_rank'] + fitness['exact_sequence_log_pll_rank'] + fitness['expression_rank']) / 4
exact_fitness['e_fitness'] = (exact_fitness['pae_interaction_rank'] + exact_fitness['i_ptm_rank'] + exact_fitness['sequence_log_pll_rank'] + exact_fitness['expression_rank']) / 4


In [176]:
fdf = fitness[~fitness['binder_sequence'].isin(exact_fitness['binder_sequence'])]

In [179]:
f_cols = [
    'sequence_hash',
    'binder_length',
    'exact_fitness',
    'e_fitness',
    'pae_interaction', 
    'i_ptm', 
    'exact_sequence_log_pll', 
    'expression_mean',
    'p_soluble', 
    'pae_interaction_rank', 
    'i_ptm_rank',
    'exact_sequence_log_pll_rank',
    'expression_rank',
    'p_soluble_rank', 
    
    # 'binder_charged_fraction', 
    # 'binder_hydrophobic_fraction',
    # 'binder_hydrophobicity',
    # 'binder_hydropathy', 'binder_solubility',
    # 'parent_hash', 
    # 'parent_sequence', 
    # 'mutations', 
    'binder_sequence',
]

In [163]:
def color_scale(data):
    """
    Takes a series and returns a color scale normalized to that column's range
    """
    def _color_value(val, min_val, max_val):
        if pd.isna(val):
            return ''
        
        # Don't apply to non-numeric columns
        if not isinstance(val, (int, float)):
            return ''
            
        # Normalize value to the column's range
        if max_val == min_val:
            normalized_val = 0
        else:
            normalized_val = (val - min_val) / (max_val - min_val)
        
        # Create blue color scale
        intensity = normalized_val
        return f'background-color: rgba(0,0,255,{intensity})'
    
    # Get the min and max values for this column
    min_val = data.min()
    max_val = data.max()
    return [_color_value(v, min_val, max_val) for v in data]


In [132]:
fdf[f_cols].sort_values('exact_fitness', ascending=False).round(2).query('expression_mean > 0.3').head(10)['binder_sequence'].tolist()

['SYDGKCLNNGACRYIERLDSYTCHCVSGYTGDRCQTRDLRWLELR',
 'KYDGYCNNNGVCHHIESLDKYTCNCRVGYSGDRCQTRDLRWLELRY',
 'SYDGYCNNHGVCRHIESLDSWTCQCRQGYEGDRCQTRDLRWLELN',
 'SYKGYCNNHGVCRHIESLDTYTCQCKQGYEGDRCETRDLRWLELR',
 'TYDGYCLNGGKCEHVESLDKYTCNCVSGYTGDRCETRDLRWLEHR',
 'TYDGYCLNGGKCRQVESLDKYTCNCVSGYTGDRCQTRDLRWLEQR',
 'PYDGYCLNGGVCMHIESLDKGTCECVEGYTGDRCQTRDLRWLELR',
 'SNCPERYRGHCENNGSCKYVRNLNTYTCQCLSGYTGARCDMLDIRYLL',
 'PYKGYCLNGGVCMHIESLDKYTCECVIGYTGDRCQDRDLRWLELR',
 'CPRRYNGICTNNGSCQYAINLRTYTCQCLPGYTKPKCQELDIRY']

In [180]:

# Prepare display dataframe
display_df = fdf[f_cols].sort_values('exact_fitness', ascending=False).round(2).query('expression_mean > 0.3').query('i_ptm >0.93').head(300)

# Apply the styling to numeric columns
numeric_cols = display_df.select_dtypes(include=['float64', 'int64']).columns
styled_df = display_df.style.format(precision=2)

for col in numeric_cols:
    styled_df = styled_df.apply(color_scale, subset=[col])

styled_df

Unnamed: 0,sequence_hash,binder_length,exact_fitness,e_fitness,pae_interaction,i_ptm,exact_sequence_log_pll,expression_mean,p_soluble,pae_interaction_rank,i_ptm_rank,exact_sequence_log_pll_rank,expression_rank,p_soluble_rank,binder_sequence
726,bdr_a8d1dd,45,0.91,0.86,7.68,0.94,-82.47,0.44,0.15,0.95,0.89,0.89,0.73,0.59,SYEGYCLNGGELVHVESLDSYTCECLKGYTGDRCQSQDLRYLYLE
444,bdr_236061,45,0.9,0.87,7.83,0.94,-81.16,0.48,0.16,0.91,0.89,0.9,0.77,0.61,SYEGYCLNGGTLVHVESLDSKTCKCLKGYTGDRCQSQDLRYLYLE
1485,bdr_37bb36,45,0.89,0.83,7.8,0.94,-84.14,0.35,0.1,0.92,0.89,0.86,0.64,0.52,SYEGYCLNGGTLHHVESLDSYTCGCLKGYQGDRCQSQDLRYLYLE
257,bdr_bd111d,45,0.89,0.87,7.99,0.94,-80.1,0.53,0.18,0.86,0.89,0.91,0.8,0.62,SYEGYCLNGGTLVHVESLDSYTCHCLKGYTGDRCQSQDARYLALE
2,bdr_f06b8f,45,0.84,0.83,7.96,0.94,-90.36,0.53,0.17,0.86,0.89,0.76,0.81,0.62,SYDGYCNNHGVCRHIESLDSWTCQCRQGYEGDRCQTRDLRWLELN
19,bdr_e27bff,45,0.84,0.85,8.13,0.94,-86.9,0.62,0.27,0.8,0.89,0.82,0.88,0.68,SYKGYCNNHGVCRHIESLDTYTCQCKQGYEGDRCETRDLRWLELR
1618,bdr_52210e,48,0.83,0.82,8.18,0.94,-85.5,0.48,0.06,0.77,0.89,0.84,0.77,0.41,SNCPERYRGHCENNGSCKYVRNLNTYTCQCLSGYTGARCDMLDIRYLL
539,bdr_2c7798,44,0.83,0.78,7.75,0.94,-96.17,0.35,0.05,0.94,0.89,0.67,0.64,0.36,CPRRYNGICTNNGSCQYAINLRTYTCQCLPGYTKPKCQELDIRY
955,bdr_db8027,45,0.82,0.81,8.23,0.94,-85.61,0.5,0.1,0.75,0.89,0.84,0.78,0.52,EYKGYCLNNARCRHVQSLDRYTCNCVSGYTGDRCQQRDLRWLELR
319,bdr_f12f5e,45,0.82,0.82,8.14,0.94,-88.78,0.51,0.08,0.79,0.89,0.79,0.79,0.46,SYDGKCLNNGACRYIERLDSYTCNCVSGYTGDRCQTLDLRWLELR


In [90]:
display_df['binder_sequence'].iloc[:4].tolist()

['SYEGYCENGGTLQHIESLDSYTCKCLKGYTGDRCQSQDLRYLYLE',
 'CPARYNGICTNHGRCQYARNLRTYTCQCLPGYTKHRCQELDIRY',
 'SYNGYCLNNGRCQHIMSLDSYTCRCEVGYSGDRCQTHDLRWLELR',
 'HYDGYCLNGGACRRIESLHSYTCQCQKGYSGDRCQTRDLRWLELQN']

In [23]:
fitness.query('binder_length == 55').sort_values('i_ptm', ascending=False).head(10)

Unnamed: 0,seq_hash,binder_sequence,binder_length,target_length,model_number,binder_plddt,binder_pae,pae_interaction,ptm,i_ptm,binder_charged_fraction,binder_hydrophobic_fraction,parent_hash,parent_sequence,mutations,binder_hydrophobicity,binder_hydropathy,binder_solubility,p_soluble,sequence_hash,sequence,sequence_length,normalized_log_pll,sequence_log_pll,mean_token_probability,min_token_probability,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,p_soluble_rank,fitness,exact_sequence_hash,exact_sequence,exact_sequence_length,exact_normalized_log_pll,exact_sequence_log_pll,exact_sequence_log_pll_rank,exact_fitness
694,1f6276,YSLSACPKRYSGVCSNAGVCHLAVSLGSYTCTCQTGYQGERCQTYDLRYILLELE,55,622,1,91.812,3.473683,7.53127,0.87,0.95,0.163636,0.309091,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,"T1Y,G8K,G17A,P40E,I49Y",0.306758,0.492929,0.537036,0.045618,bdr_fdbbeb,YSLSACPKRYSGVCSNAGVCHLAVSLGSYTCTCQTGYQGERCQTYDLRYILLELE,55.0,-4.094093,-225.175125,,,0.979897,0.973482,0.549217,0.341621,0.711054,fdbbeb,YSLSACPKRYSGVCSNAGVCHLAVSLGSYTCTCQTGYQGERCQTYDLRYILLELE,55.0,-2.126485,-116.956699,0.269612,0.740997
1575,0c32ca,ELFSACPSKYRGACTNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLM,55,622,1,90.235636,3.760704,7.89263,0.87,0.94,0.181818,0.272727,e9c179,ELFSACPSNYRLACNNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLQ,"N9K,L12G,N15T,Q55M",0.319451,0.484646,0.535019,0.04906,bdr_910021,ELFSACPSKYRGACTNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLM,55.0,-4.620086,-254.104736,,,0.882158,0.880637,0.456222,0.361857,0.645219,910021,ELFSACPSKYRGACTNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLM,55.0,-1.910156,-105.0586,0.545707,0.769501
1380,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55,622,1,89.347091,3.944542,8.103986,0.87,0.94,0.145455,0.309091,e9c179,ELFSACPSNYRLACNNGGVCRLAESLSSYTCQCAPGYSGPRCQTLDLRYIELRLQ,"R11L,Q32I",0.335575,0.510303,0.543909,0.035542,bdr_17fa18,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-3.425479,-188.401352,,,0.801217,0.880637,0.682011,0.27417,0.659509,17fa18,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-2.025817,-111.419908,0.392909,0.691588
493,fb0fca,TSLSACPGRYSGVCSNGGVCHLAVSLGRYTCTCQTGYQGPRCQTYDLRIILLELE,55,622,1,89.424545,4.227091,8.152867,0.87,0.94,0.145455,0.290909,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,S28R,0.299348,0.504848,0.538376,0.043462,bdr_a2483c,TSLSACPGRYSGVCSNGGVCHLAVSLGRYTCTCQTGYQGPRCQTYDLRIILLELE,55.0,-2.093213,-115.126694,,,0.776617,0.880637,0.928628,0.328396,0.72857,a2483c,TSLSACPGRYSGVCSNGGVCHLAVSLGRYTCTCQTGYQGPRCQTYDLRIILLELE,55.0,-2.166444,-119.154437,0.236539,0.631265
602,b1e012,TSLSACPGRYSGVCSNGGTCHLAVSLGSYTCTCQTGYQGPRCQTYDLRWILLELE,55,622,1,90.095091,3.93157,7.969513,0.87,0.94,0.127273,0.272727,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,"V19T,I49W",0.289811,0.491515,0.538149,0.043819,bdr_ea294c,TSLSACPGRYSGVCSNGGTCHLAVSLGSYTCTCQTGYQGPRCQTYDLRWILLELE,55.0,-2.793515,-153.643326,,,0.856368,0.880637,0.808968,0.331041,0.719254,ea294c,TSLSACPGRYSGVCSNGGTCHLAVSLGSYTCTCQTGYQGPRCQTYDLRWILLELE,55.0,-2.095713,-115.26424,0.304008,0.680338
2004,6ca873,ELFSACPRNYLGACNNGGVCRLACSLKSYTCICAPGYSGHRCQTLDLRYIELRLQ,55,622,1,90.332909,3.712377,7.825061,0.87,0.94,0.181818,0.290909,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"S8R,L12G,E24C,S27K,P40H",0.331801,0.49697,0.545528,0.033501,bdr_e02ee5,ELFSACPRNYLGACNNGGVCRLACSLKSYTCICAPGYSGHRCQTLDLRYIELRLQ,55.0,-4.779858,-262.892212,,,0.907155,0.880637,0.421863,0.257109,0.616691,e02ee5,ELFSACPRNYLGACNNGGVCRLACSLKSYTCICAPGYSGHRCQTLDLRYIELRLQ,55.0,-2.080672,-114.436951,0.323323,0.703705
739,2227ce,ELFSACPSRYHGACNNRGVCRLAESLSSYTCICASGYSGPRCQTLDLRYIELRLQ,55,622,1,90.673818,3.662529,7.786452,0.87,0.94,0.2,0.272727,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"N9R,L11H,L12G,G17R,P35S",0.313551,0.47899,0.536617,0.046315,bdr_3b9cd7,ELFSACPSRYHGACNNRGVCRLAESLSSYTCICASGYSGPRCQTLDLRYIELRLQ,55.0,-3.438047,-189.092606,,,0.920645,0.880637,0.677633,0.347044,0.70649,3b9cd7,ELFSACPSRYHGACNNRGVCRLAESLSSYTCICASGYSGPRCQTLDLRYIELRLQ,55.0,-2.021865,-111.202568,0.399259,0.733514
1810,43bd2e,ELFSACPSRYLLACNTGGVCRLAESLSSYTCICAPGYSGTRCQTLDLRYIELRLE,55,622,1,89.341455,3.891243,8.123738,0.87,0.94,0.181818,0.309091,d67a60,ELFSACPSNYLLACNNGGVCRLAESLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"N9R,N16T,P40T,Q55E",0.3294,0.515758,0.542123,0.037934,bdr_501fd3,ELFSACPSRYLLACNTGGVCRLAESLSSYTCICAPGYSGTRCQTLDLRYIELRLE,55.0,-4.095197,-225.23584,,,0.791297,0.880637,0.548687,0.292289,0.628228,501fd3,ELFSACPSRYLLACNTGGVCRLAESLSSYTCICAPGYSGTRCQTLDLRYIELRLE,55.0,-2.191429,-120.52861,0.223045,0.63166
1786,87076b,SLFSACPSRYTGACHNGGVCRLAISLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55,622,1,91.141455,3.543117,7.750459,0.87,0.94,0.145455,0.290909,82b7dd,SLFSACPSRYLGACHNGGVCRLATSLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,"L11T,T24I",0.332007,0.512929,0.555895,0.022878,bdr_c4e8c6,SLFSACPSRYTGACHNGGVCRLAISLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-4.038197,-222.10083,,,0.931226,0.880637,0.561953,0.147996,0.630453,c4e8c6,SLFSACPSRYTGACHNGGVCRLAISLSSYTCICAPGYSGPRCQTLDLRYIELRLQ,55.0,-1.944401,-106.942031,0.503903,0.771922
873,dab460,TSLSACPARYSGVCSNGAVCHLAVSLDSYTCTCQTGYQGPRCQTYDLRIILLFLD,55,622,1,90.161455,4.124294,7.927026,0.87,0.94,0.127273,0.309091,e927ef,TSLSACPGRYSGVCSNGGVCHLAVSLGSYTCTCQTGYQGPRCQTYDLRIILLELE,"G8A,G18A,G27D,E53F,E55D",0.316226,0.527677,0.551212,0.027193,bdr_66e284,TSLSACPARYSGVCSNGAVCHLAVSLDSYTCTCQTGYQGPRCQTYDLRIILLFLD,55.0,-2.71711,-149.44104,,,0.870123,0.880637,0.837357,0.199048,0.696791,66e284,TSLSACPARYSGVCSNGAVCHLAVSLDSYTCTCQTGYQGPRCQTYDLRIILLFLD,55.0,-2.162216,-118.921873,0.240111,0.663624


In [None]:
fitness.query('binder_length == 241').sort_values('fitness')

In [181]:
exact_fitness.columns
ef_cols = [
    'seq_hash', 
    'binder_length',
    'fitness',
    'e_fitness',
    'pae_interaction', 
    'i_ptm', 
    'sequence_log_pll', 
    'expression_mean',
    'p_soluble',
    'pae_interaction_rank',
    'i_ptm_rank',
    'sequence_log_pll_rank',
    'expression_rank',
    'p_soluble_rank',
    'binder_sequence',
]

In [165]:
exact_fitness['pae_interaction_rank'] = exact_fitness['pae_interaction'].rank(ascending=False, pct=True)

In [None]:
exact_fitness

In [182]:
# Prepare display dataframe
display_df = exact_fitness[ef_cols].sort_values('fitness', ascending=False).round(2).head(300)

# Apply the styling to numeric columns
numeric_cols = display_df.select_dtypes(include=['float64', 'int64']).columns
styled_df = display_df.style.format(precision=2)

for col in numeric_cols:
    styled_df = styled_df.apply(color_scale, subset=[col])

styled_df

Unnamed: 0,seq_hash,binder_length,fitness,e_fitness,pae_interaction,i_ptm,sequence_log_pll,expression_mean,p_soluble,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,expression_rank,p_soluble_rank,binder_sequence
0,72e67e,54,0.78,0.63,7.85,0.95,-105.74,-0.28,0.03,0.93,0.75,0.65,0.19,0.42,SLFSTCPRRYRGICHNNGSCRYAVNLRTYTCVCRSGYTGKRCQEADLRYLLLRY
1,c1141a,54,0.77,0.74,7.83,0.95,-109.6,0.16,0.04,0.96,0.75,0.62,0.64,0.57,SLFSRCPKRYHGICNNNGQCRYAINLRTYTCICKSGYTGDRCQELDIRYLLLLN
2,9cb679,54,0.76,0.67,7.86,0.95,-109.39,-0.06,0.02,0.91,0.75,0.62,0.39,0.36,GLFSKCPRRYRGICKNNGSCRYAINLRTYTCQCRSGYTGPRCQELDIRYLLLRY
3,8b44d0,50,0.75,0.78,7.92,0.95,-102.55,0.45,0.07,0.81,0.75,0.68,0.87,0.81,SRFSNCPRRYRGICTNSGECTYAKNLRTYTCQCVSGYTGHRCEELDIRYL
4,12e3a0,54,0.74,0.58,7.84,0.95,-111.84,-0.53,0.02,0.95,0.75,0.53,0.08,0.12,GLFSICPRRYQGICKNNGTCRYALNLRTYTCQCVSGYTGARCQELDIRYLLLRY
5,8cb9d0,50,0.74,0.77,7.92,0.95,-103.92,0.44,0.07,0.81,0.75,0.67,0.86,0.78,SRFSNCPRRYRGICTNNGECTYAKNLRTYTCQCVSGYTGHRCQELDIRYL
6,7108f2,54,0.74,0.64,7.82,0.95,-112.23,-0.1,0.02,0.97,0.75,0.5,0.34,0.08,SLFSRCPRRYHGICGNNGRCRYAINLRTQTCRCYSGYTGYRCQELDIRYLLLLN
7,b1adfc,54,0.74,0.71,7.87,0.95,-110.94,0.18,0.05,0.89,0.75,0.57,0.65,0.7,NLFSRCPKRYHGICENNGQCRYAINLRTYTCICDSGYTGDRCQELDIRYLLLLN
8,45355a,54,0.72,0.6,7.87,0.95,-112.18,-0.22,0.02,0.9,0.75,0.5,0.23,0.13,SLFSKCPRRYHGICGNNGLCRYAINLRTYTCRCLSGYTGYRCQELDIPYLLRLN
9,9fe973,54,0.72,0.6,7.86,0.95,-112.49,-0.16,0.02,0.92,0.75,0.49,0.26,0.07,SLFSKCPRRYHGICGNNGLCRYAINLRTYTCRCVSGYTGYRCQELDIRYLLLLN


In [194]:
# Prepare display dataframe
display_df = (
    exact_fitness[ef_cols]
    .sort_values(
        ['i_ptm', 'fitness', 'pae_interaction', 'sequence_log_pll', 'expression_mean'], 
        ascending=[False, False, True, False, False])
        # .query('expression_mean > 0.')
        .query('binder_length == 65').round(2).head(300)
        )

# Apply the styling to numeric columns
numeric_cols = display_df.select_dtypes(include=['float64', 'int64']).columns
styled_df = display_df.style.format(precision=2)

for col in numeric_cols:
    styled_df = styled_df.apply(color_scale, subset=[col])

styled_df

Unnamed: 0,seq_hash,binder_length,fitness,e_fitness,pae_interaction,i_ptm,sequence_log_pll,expression_mean,p_soluble,pae_interaction_rank,i_ptm_rank,sequence_log_pll_rank,expression_rank,p_soluble_rank,binder_sequence
172,74a581,65,0.01,0.11,18.3,0.91,-160.0,-0.05,0.98,0.01,0.01,0.01,0.41,1.0,AERMRRRFEHIVEIHEEWAKEVLEWLKKQGSKEEDYKFMEEYLEQDVKELRKRAEEMVEEYEKSG
