In [1]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [4]:
#@title Imports
import torch
import random
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.svm import SVR
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.utils import resample
from scipy.stats import spearmanr
from sklearn.metrics import ndcg_score
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, normalize
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.utils import resample
from scipy.stats import spearmanr
from scipy.stats import pearsonr
from sklearn.metrics import ndcg_score
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR, SVC
from sklearn.linear_model import Ridge
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import f1_score
from scipy.stats import sem
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

# Performs one round of 10-fold cross validation
def tfcv(X, y, MODEL):
  y = np.array(y)
  acc, f1, pre, rec, auroc = [], [], [], [], []
  kf = StratifiedKFold(n_splits=5, shuffle=True)
  for train_index, test_index in kf.split(X, y):
    # Get the train/val data for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pca = PCA(n_components=50)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)

    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)

    MODEL.fit(X_train, y_train)
    y_pred = MODEL.predict(X_test)

    # Compute metrics for current fold
    fold_acc = accuracy_score(y_test, y_pred)
    fold_f1 = f1_score(y_test, y_pred)
    fold_pre = precision_score(y_test, y_pred)
    fold_rec = recall_score(y_test, y_pred)
    fold_roc = roc_auc_score(y_test, y_pred)
    acc.append(fold_acc)
    f1.append(fold_f1)
    pre.append(fold_pre)
    rec.append(fold_rec)
    auroc.append(fold_roc)

  return np.mean(np.array(acc)), np.mean(np.array(f1)), np.mean(np.array(pre)), np.mean(np.array(rec)), np.mean(np.array(auroc))

# Optimize a hyperparameter
def optimize(X, y, MODEL, PARAM, param_values):
    top_value = param_values[0]
    top_score = 0
    for i in tqdm(param_values):
        currentscore, _, _, _, _ = tfcv(X, y, MODEL(**{PARAM: i}))
        if currentscore > top_score:
            top_score = currentscore
            top_value = i

    final_acc, final_f1, final_pre, final_rec, final_auroc = [], [], [], [], []
    for i in range(5):
        ac, f1, pre, rec, auroc = tfcv(X, y, MODEL(**{PARAM: top_value}))
        final_acc.append(ac)
        final_f1.append(f1)
        final_pre.append(pre)
        final_rec.append(rec)
        final_auroc.append(auroc)
    final_acc = np.array(final_acc)
    final_f1 = np.array(final_f1)
    final_pre = np.array(final_pre)
    final_rec = np.array(final_rec)
    final_auroc = np.array(final_auroc)
    print(f'Accuracy: {np.mean(final_acc)}, +/- {sem(final_acc)}')
    print(f'F1: {np.mean(final_f1)}, +/- {sem(final_f1)}')
    print(f'Precision: {np.mean(final_pre)}, +/- {sem(final_pre)}')
    print(f'Recall: {np.mean(final_rec)}, +/- {sem(final_rec)}')
    print(f'Auroc: {np.mean(final_auroc)}, +/- {sem(final_auroc)}')
    print(f'top_value: {top_value}')
    return np.mean(final_acc), sem(final_acc)

# Load sequences from csv
df = pd.read_csv('./drive/MyDrive/Data/LazBF_sequences.csv')
LazBF_sequences = df['sequences'].tolist()
LazBF_labels = np.array(df['labels'].tolist())

df = pd.read_csv('./drive/MyDrive/Data/LazBF_sample.csv')
LazBF_sample = df['sequences'].tolist()
LazBF_sample_labels = np.array(df['labels'].tolist())

df = pd.read_csv('./drive/MyDrive/Data/LazDEF_sequences.csv')
LazDEF_sequences = df['sequences'].tolist()
LazDEF_labels = np.array(df['labels'].tolist())

df = pd.read_csv('./drive/MyDrive/Data/LazDEF_sample.csv')
LazDEF_sample = df['sequences'].tolist()
LazDEF_sample_labels = np.array(df['labels'].tolist())

# Load Embs
lazbf_mlm_none = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_none.npy")
lazdef_mlm_none = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_none.npy")

lazbf_mlm_pa = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_PA.npy")
lazdef_mlm_pa = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_PA.npy")

lazbf_mlm_lazbf = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_LazBF.npy")
lazdef_mlm_lazbf = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_LazBF.npy")

lazbf_mlm_lazdef = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_LazDEF.npy")
lazdef_mlm_lazdef = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_LazDEF.npy")

lazbf_mlm_lazbcdef = np.load("./drive/MyDrive/Embeddings/LazBF_mlm_LazBCDEF.npy")
lazdef_mlm_lazbcdef = np.load("./drive/MyDrive/Embeddings/LazDEF_mlm_LazBCDEF.npy")

model_list = [LogisticRegression, RandomForestClassifier, AdaBoostClassifier, SVC, MLPClassifier]
param_list = ['C', 'n_estimators', 'n_estimators', 'C', 'hidden_layer_sizes']
value_list = [
   [1e-2, 1e-1, 1, 5], # LR
   # [5, 25, 50, 80], # KNN
   [5, 25, 50, 100], # RF
   [5, 25, 50, 100], # Ada
   [1e-2, 1e-1, 1, 5], # SVC
   [50, 100, 200, 500], # MLP
]

def balanced_sample_np(seqs, labels, N, seed):
    labels = np.array(labels)
    indices_0 = np.where(labels == 0)[0]
    indices_1 = np.where(labels == 1)[0]
    min_count = min(len(indices_0), len(indices_1), N // 2)
    sampled_indices_0 = np.random.choice(indices_0, min_count, replace=False)
    sampled_indices_1 = np.random.choice(indices_1, min_count, replace=False)
    sampled_indices = np.concatenate((sampled_indices_0, sampled_indices_1))
    np.random.seed(seed)
    random.seed(seed)
    np.random.shuffle(sampled_indices)
    return sampled_indices

In [11]:
#@title LazBF HighN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 1000, 754643236)
y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazBF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_none[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazBF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_pa[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazBF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbf[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazBF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazBF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbcdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)

Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.04it/s]


Accuracy: 0.8779999999999999, +/- 0.0010954451150103433
F1: 0.8808005899295429, +/- 0.0011200359900306958
Precision: 0.8620811862183357, +/- 0.00114724560672565
Recall: 0.9016, +/- 0.0015999999999999736
Auroc: 0.8779999999999999, +/- 0.0010954451150103433
top_value: 0.1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.02it/s]


Accuracy: 0.8827999999999999, +/- 0.0013928388277183812
F1: 0.8858411793557932, +/- 0.0014630753054394926
Precision: 0.8654513851820956, +/- 0.001120142552778452
Recall: 0.9084, +/- 0.00278567765543683
Auroc: 0.8827999999999999, +/- 0.0013928388277183812
top_value: 0.01
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:06<00:00,  1.66s/it]


Accuracy: 0.9725999999999999, +/- 0.0008717797887081354
F1: 0.9724612404069436, +/- 0.0008952933695517317
Precision: 0.9767425702295576, +/- 0.0007539812126728994
Recall: 0.9683999999999999, +/- 0.0016000000000000166
Auroc: 0.9725999999999999, +/- 0.0008717797887081431
top_value: 0.01
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.12it/s]


Accuracy: 0.8960000000000001, +/- 0.0010488088481701524
F1: 0.8978635264408167, +/- 0.0010570224878762387
Precision: 0.883486450983311, +/- 0.0007951693428576657
Recall: 0.9136, +/- 0.001326649916142151
Auroc: 0.8960000000000001, +/- 0.0010488088481701524
top_value: 0.1
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.04it/s]


Accuracy: 0.8879999999999999, +/- 0.0013784048752090315
F1: 0.8884780413716078, +/- 0.0014376984778816466
Precision: 0.8854071048802993, +/- 0.0013709669560617462
Recall: 0.8927999999999999, +/- 0.001959591794226556
Auroc: 0.8880000000000001, +/- 0.0013784048752090155
top_value: 0.1
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.81s/it]


Accuracy: 0.8423999999999999, +/- 0.0032649655434629113
F1: 0.8444796755701196, +/- 0.0033153346634013153
Precision: 0.8356997621095246, +/- 0.0037467029658049687
Recall: 0.8555999999999999, +/- 0.0029257477676655473
Auroc: 0.8423999999999999, +/- 0.003264965543462912
top_value: 100
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.88s/it]


Accuracy: 0.8545999999999999, +/- 0.0021587033144923155
F1: 0.8591076075435209, +/- 0.0019056573378419383
Precision: 0.834501536764195, +/- 0.0034599818745962314
Recall: 0.8864000000000001, +/- 0.002227105745131979
Auroc: 0.8546000000000001, +/- 0.0021587033144922717
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.17s/it]


Accuracy: 0.9738, +/- 0.00037416573867736773
F1: 0.9736053719233494, +/- 0.00038456026477520674
Precision: 0.9795776734159507, +/- 0.0015944748955343021
Recall: 0.968, +/- 0.0012649110640673442
Auroc: 0.9738, +/- 0.00037416573867736773
top_value: 25
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.75s/it]


Accuracy: 0.8575999999999999, +/- 0.0016911534525287685
F1: 0.8576285547297984, +/- 0.0016821652451652052
Precision: 0.8564540081784214, +/- 0.003443228709093013
Recall: 0.86, +/- 0.0028284271247461493
Auroc: 0.8576, +/- 0.0016911534525287765
top_value: 50
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:10<00:00,  2.63s/it]


Accuracy: 0.8524, +/- 0.0016000000000000207
F1: 0.8515389423406783, +/- 0.0013549144265457601
Precision: 0.8559555905151066, +/- 0.0037141579547949203
Recall: 0.8484, +/- 0.002856571371417141
Auroc: 0.8524, +/- 0.0016000000000000207
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.20s/it]


Accuracy: 0.837, +/- 0.0050990195135928
F1: 0.83879652874284, +/- 0.005138774941738729
Precision: 0.8296066962171835, +/- 0.005949615478361001
Recall: 0.8496, +/- 0.0074672618810377855
Auroc: 0.837, +/- 0.005099019513592774
top_value: 100
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.02s/it]


Accuracy: 0.8382, +/- 0.004066939881532554
F1: 0.8404777800520147, +/- 0.0038492200871083912
Precision: 0.830410255824026, +/- 0.004869234634404964
Recall: 0.8524, +/- 0.0036000000000000042
Auroc: 0.8382, +/- 0.004066939881532554
top_value: 25
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.23s/it]


Accuracy: 0.9728, +/- 0.0006633249580710621
F1: 0.9726473786080305, +/- 0.0006493845757782267
Precision: 0.9792957786640379, +/- 0.0010749987369605908
Recall: 0.9663999999999999, +/- 0.0007483314773548067
Auroc: 0.9728, +/- 0.0006633249580710621
top_value: 5
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.01s/it]


Accuracy: 0.8634000000000001, +/- 0.001860107523773828
F1: 0.8647530183497988, +/- 0.0018540621298019106
Precision: 0.8563666325043293, +/- 0.0025996765468945674
Recall: 0.874, +/- 0.0033466401061363186
Auroc: 0.8633999999999998, +/- 0.0018601075237738305
top_value: 100
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.25s/it]


Accuracy: 0.8555999999999999, +/- 0.0023151673805580225
F1: 0.8557416377961811, +/- 0.0020852147407975277
Precision: 0.8544537235223958, +/- 0.004380554030726752
Recall: 0.858, +/- 0.0017888543819998394
Auroc: 0.8555999999999999, +/- 0.0023151673805580286
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.29s/it]


Accuracy: 0.8790000000000001, +/- 0.0011401754250991388
F1: 0.8823112238612423, +/- 0.0010200023490691418
Precision: 0.859607195980318, +/- 0.0021215052959875826
Recall: 0.9067999999999999, +/- 0.0013564659966250284
Auroc: 0.8790000000000001, +/- 0.0011401754250991293
top_value: 1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.34s/it]


Accuracy: 0.8826, +/- 0.0015362291495736847
F1: 0.8846226179197781, +/- 0.0014871903288833919
Precision: 0.8717833897554979, +/- 0.0026008585019104483
Recall: 0.8987999999999999, +/- 0.002244994432064371
Auroc: 0.8826, +/- 0.0015362291495736933
top_value: 1
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.48s/it]


Accuracy: 0.9712, +/- 0.001428285685708561
F1: 0.9713185242658333, +/- 0.0014005892057102971
Precision: 0.969370040554111, +/- 0.0017443991826355477
Recall: 0.9735999999999999, +/- 0.0014696938456699202
Auroc: 0.9712, +/- 0.0014282856857085462
top_value: 5
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:06<00:00,  1.58s/it]


Accuracy: 0.8992000000000001, +/- 0.0013190905958272519
F1: 0.9012318387569476, +/- 0.0012201609080028364
Precision: 0.8845795180389503, +/- 0.002614716086161909
Recall: 0.9192, +/- 0.0017435595774162784
Auroc: 0.8992000000000001, +/- 0.0013190905958273106
top_value: 1
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:07<00:00,  1.89s/it]


Accuracy: 0.8897999999999999, +/- 0.0028178005607210664
F1: 0.8920378739508728, +/- 0.0027425004339168384
Precision: 0.8750619779833058, +/- 0.003510678704030488
Recall: 0.9104000000000001, +/- 0.0029933259094191587
Auroc: 0.8897999999999999, +/- 0.002817800560721055
top_value: 1
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:38<00:00,  9.60s/it]


Accuracy: 0.8695999999999999, +/- 0.0015033296378372907
F1: 0.8713750256144805, +/- 0.0012885718516758918
Precision: 0.8600556808897082, +/- 0.0027078875809866828
Recall: 0.8844000000000001, +/- 0.002135415650406257
Auroc: 0.8695999999999999, +/- 0.0015033296378372885
top_value: 500
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:35<00:00,  8.85s/it]


Accuracy: 0.8678000000000001, +/- 0.0027820855486487316
F1: 0.8694513854267649, +/- 0.0029891275984638786
Precision: 0.8592747682668798, +/- 0.002426752118150061
Recall: 0.8808, +/- 0.005462600113499035
Auroc: 0.8678000000000001, +/- 0.002782085548648728
top_value: 50
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:29<00:00,  7.31s/it]


Accuracy: 0.97, +/- 0.0005477225575051464
F1: 0.9698617474035623, +/- 0.0005395973404175075
Precision: 0.9736713867663891, +/- 0.000707986251995605
Recall: 0.9663999999999999, +/- 0.000748331477354771
Auroc: 0.97, +/- 0.0005477225575051464
top_value: 500
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:34<00:00,  8.57s/it]


Accuracy: 0.8946, +/- 0.0023151673805580242
F1: 0.8954395191327841, +/- 0.0023720659386718772
Precision: 0.8896311447623859, +/- 0.003611622535941364
Recall: 0.9020000000000001, +/- 0.002190890230020646
Auroc: 0.8946, +/- 0.0023151673805580646
top_value: 500
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:35<00:00,  8.77s/it]


Accuracy: 0.877, +/- 0.001095445115010333
F1: 0.8782727854281378, +/- 0.0011255985071274427
Precision: 0.8693690349660674, +/- 0.0014770359894220634
Recall: 0.8880000000000001, +/- 0.0016733200530681526
Auroc: 0.877, +/- 0.001095445115010333
top_value: 100
[[0.8779999999999999, 0.8423999999999999, 0.837, 0.8790000000000001, 0.8695999999999999], [0.8827999999999999, 0.8545999999999999, 0.8382, 0.8826, 0.8678000000000001], [0.9725999999999999, 0.9738, 0.9728, 0.9712, 0.97], [0.8960000000000001, 0.8575999999999999, 0.8634000000000001, 0.8992000000000001, 0.8946], [0.8879999999999999, 0.8524, 0.8555999999999999, 0.8897999999999999, 0.877]]
[[0.0010954451150103433, 0.0032649655434629113, 0.0050990195135928, 0.0011401754250991388, 0.0015033296378372907], [0.0013928388277183812, 0.0021587033144923155, 0.004066939881532554, 0.0015362291495736847, 0.0027820855486487316], [0.0008717797887081354, 0.00037416573867736773, 0.0006633249580710621, 0.001428285685708561, 0.0005477225575051464], [0.00104

In [16]:
#@title LazBF HighN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 1000, 2)
y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazBF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_none[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazBF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_pa[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazBF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbf[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazBF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazBF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbcdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)

Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:06<00:00,  1.55s/it]


Accuracy: 0.9059999999999999, +/- 0.00207364413533279
F1: 0.9073954756805053, +/- 0.0020319384137558465
Precision: 0.8952231530692704, +/- 0.002438886535083817
Recall: 0.9204000000000001, +/- 0.002227105745132031
Auroc: 0.9059999999999999, +/- 0.00207364413533279
top_value: 0.1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:05<00:00,  1.40s/it]


Accuracy: 0.9061999999999999, +/- 0.0018814887722226829
F1: 0.9079440073925558, +/- 0.0018585964825180324
Precision: 0.8917620285043043, +/- 0.002117815000708595
Recall: 0.9256, +/- 0.0021354156504062964
Auroc: 0.9061999999999999, +/- 0.0018814887722226488
top_value: 0.01
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:04<00:00,  1.17s/it]


Accuracy: 0.9711999999999998, +/- 0.0007348469228349405
F1: 0.9710560401336356, +/- 0.0007141557255861946
Precision: 0.9747399160245145, +/- 0.0007859186863516688
Recall: 0.9676, +/- 0.000979795897113256
Auroc: 0.9711999999999998, +/- 0.0007348469228349405
top_value: 1
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.07it/s]


Accuracy: 0.9116, +/- 0.001568438714135817
F1: 0.9131196573904055, +/- 0.001522197461190998
Precision: 0.8984232560062468, +/- 0.002145271363111165
Recall: 0.9288000000000001, +/- 0.001624807680927199
Auroc: 0.9116, +/- 0.0015684387141358213
top_value: 0.01
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:05<00:00,  1.46s/it]


Accuracy: 0.9097999999999999, +/- 0.0011575836902790013
F1: 0.9101537111821887, +/- 0.001172728191475415
Precision: 0.9069548892791571, +/- 0.0021909964116054227
Recall: 0.914, +/- 0.0018973665961010352
Auroc: 0.9098, +/- 0.001157583690279037
top_value: 0.1
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.84s/it]


Accuracy: 0.8506, +/- 0.001400000000000033
F1: 0.84814169881938, +/- 0.0010760778735378128
Precision: 0.8617204389950548, +/- 0.0036800762213164222
Recall: 0.836, +/- 0.0021908902300207017
Auroc: 0.8506, +/- 0.0014000000000000123
top_value: 50
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:09<00:00,  2.46s/it]


Accuracy: 0.8734, +/- 0.003264965543462912
F1: 0.8741072511108927, +/- 0.003251067725386063
Precision: 0.8692329878827669, +/- 0.002816937925548438
Recall: 0.8796000000000002, +/- 0.004118252056394804
Auroc: 0.8734, +/- 0.003264965543462912
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:09<00:00,  2.43s/it]


Accuracy: 0.9673999999999999, +/- 0.00040000000000000034
F1: 0.9672759380420253, +/- 0.00033173288720780397
Precision: 0.9673508212514736, +/- 0.0016849517372019922
Recall: 0.968, +/- 0.0010954451150103535
Auroc: 0.9673999999999999, +/- 0.00040000000000000034
top_value: 100
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:10<00:00,  2.75s/it]


Accuracy: 0.8792000000000002, +/- 0.0017720045146669612
F1: 0.8791582232988132, +/- 0.0019293455339485866
Precision: 0.8792181224233998, +/- 0.002659379859624692
Recall: 0.8804000000000001, +/- 0.003310589071449358
Auroc: 0.8792, +/- 0.0017720045146669184
top_value: 100
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.82s/it]


Accuracy: 0.8635999999999999, +/- 0.0031080540535840012
F1: 0.8614000317947005, +/- 0.0027325875036243244
Precision: 0.8737206900496906, +/- 0.006219605452579453
Recall: 0.8512000000000001, +/- 0.0033823069050575422
Auroc: 0.8636000000000001, +/- 0.003108054053584036
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.27s/it]


Accuracy: 0.8634000000000001, +/- 0.0062657800791282225
F1: 0.8643784055173752, +/- 0.005984657301438993
Precision: 0.8589913762488373, +/- 0.008291792175317775
Recall: 0.8708, +/- 0.00595315042645491
Auroc: 0.8634000000000001, +/- 0.0062657800791282225
top_value: 100
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.39s/it]


Accuracy: 0.8684000000000001, +/- 0.00304302481094061
F1: 0.8694251295265989, +/- 0.002837375220455179
Precision: 0.8632600706628308, +/- 0.004080245410610429
Recall: 0.8768, +/- 0.002870540018881467
Auroc: 0.8684000000000001, +/- 0.0030430248109405557
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.08s/it]


Accuracy: 0.9587999999999999, +/- 0.0020346989949375726
F1: 0.95841503437752, +/- 0.0020896643884196864
Precision: 0.9645379838247023, +/- 0.0019225523092151055
Recall: 0.9532, +/- 0.002332380757938123
Auroc: 0.9587999999999999, +/- 0.0020346989949375574
top_value: 5
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.36s/it]


Accuracy: 0.8794000000000001, +/- 0.0025019992006393385
F1: 0.8798158403451865, +/- 0.0025494851559058986
Precision: 0.8772438219941785, +/- 0.003725972559210737
Recall: 0.8831999999999999, +/- 0.003382306905057588
Auroc: 0.8794000000000001, +/- 0.002501999200639375
top_value: 50
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.32s/it]


Accuracy: 0.8594000000000002, +/- 0.003472751070837077
F1: 0.8583891238391861, +/- 0.00392023882742914
Precision: 0.8648039753361134, +/- 0.003737111469279056
Recall: 0.8528, +/- 0.006280127387243003
Auroc: 0.8594000000000002, +/- 0.003472751070837077
top_value: 50
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.36s/it]


Accuracy: 0.8884000000000001, +/- 0.002088061301782088
F1: 0.8908279274740118, +/- 0.0020467934895650954
Precision: 0.8711967473861622, +/- 0.0018957667722008923
Recall: 0.9124000000000001, +/- 0.002638181191654549
Auroc: 0.8884000000000001, +/- 0.002088061301782088
top_value: 1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.38s/it]


Accuracy: 0.9057999999999999, +/- 0.0024372115213907642
F1: 0.9065383456793704, +/- 0.0023538081872413064
Precision: 0.9007207914741592, +/- 0.0024835902421444403
Recall: 0.9132, +/- 0.0021540659228537974
Auroc: 0.9058000000000002, +/- 0.0024372115213907807
top_value: 1
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:06<00:00,  1.53s/it]


Accuracy: 0.9745999999999999, +/- 0.0008124038404636186
F1: 0.9744257488927042, +/- 0.0008116596552687912
Precision: 0.9783563881761337, +/- 0.0011546151945036212
Recall: 0.9708, +/- 0.0007999999999999923
Auroc: 0.9746, +/- 0.0008124038404635748
top_value: 1
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:06<00:00,  1.73s/it]


Accuracy: 0.9160000000000001, +/- 0.0013038404810405309
F1: 0.9172659525497722, +/- 0.0012003584548870828
Precision: 0.9031963293211052, +/- 0.0027573890078420676
Recall: 0.9324, +/- 0.0023999999999999976
Auroc: 0.916, +/- 0.0013038404810405393
top_value: 1
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:07<00:00,  1.89s/it]


Accuracy: 0.9006000000000001, +/- 0.0012884098726725335
F1: 0.9009344228091238, +/- 0.0012763310919752173
Precision: 0.8997419153457734, +/- 0.001967224926034519
Recall: 0.9036000000000002, +/- 0.001469693845669911
Auroc: 0.9006000000000001, +/- 0.0012884098726725043
top_value: 5
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:38<00:00,  9.53s/it]


Accuracy: 0.8858, +/- 0.00037416573867740633
F1: 0.8869985650828328, +/- 0.0002939221398115816
Precision: 0.8772743170568009, +/- 0.0005106465063368741
Recall: 0.8976000000000001, +/- 0.0004000000000000225
Auroc: 0.8858, +/- 0.00037416573867740633
top_value: 50
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:36<00:00,  9.11s/it]


Accuracy: 0.9054, +/- 0.0018055470085267767
F1: 0.9059241259333367, +/- 0.001847167630545219
Precision: 0.9016564509097386, +/- 0.0015247651981618246
Recall: 0.9108, +/- 0.002653299832284343
Auroc: 0.9054, +/- 0.0018055470085267767
top_value: 500
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:29<00:00,  7.41s/it]


Accuracy: 0.9742, +/- 0.0006633249580710637
F1: 0.9740643432674971, +/- 0.0006595945990290826
Precision: 0.9783991135438234, +/- 0.0007092940297970056
Recall: 0.97, +/- 0.001095445115010333
Auroc: 0.9742, +/- 0.0006633249580710637
top_value: 500
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:35<00:00,  8.89s/it]


Accuracy: 0.907, +/- 0.0025298221281347165
F1: 0.9078842693347818, +/- 0.002572893746050342
Precision: 0.8989505158244704, +/- 0.002661244706713367
Recall: 0.9179999999999999, +/- 0.003405877273185254
Auroc: 0.907, +/- 0.002529822128134743
top_value: 500
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:37<00:00,  9.43s/it]


Accuracy: 0.9077999999999999, +/- 0.002596150997149436
F1: 0.9076940043377849, +/- 0.0024817827990201136
Precision: 0.909170638617694, +/- 0.003906901880697609
Recall: 0.9067999999999999, +/- 0.0016248076809272222
Auroc: 0.9077999999999999, +/- 0.002596150997149425
top_value: 200
[[0.9059999999999999, 0.8506, 0.8634000000000001, 0.8884000000000001, 0.8858], [0.9061999999999999, 0.8734, 0.8684000000000001, 0.9057999999999999, 0.9054], [0.9711999999999998, 0.9673999999999999, 0.9587999999999999, 0.9745999999999999, 0.9742], [0.9116, 0.8792000000000002, 0.8794000000000001, 0.9160000000000001, 0.907], [0.9097999999999999, 0.8635999999999999, 0.8594000000000002, 0.9006000000000001, 0.9077999999999999]]
[[0.00207364413533279, 0.001400000000000033, 0.0062657800791282225, 0.002088061301782088, 0.00037416573867740633], [0.0018814887722226829, 0.003264965543462912, 0.00304302481094061, 0.0024372115213907642, 0.0018055470085267767], [0.0007348469228349405, 0.00040000000000000034, 0.00203469899493

In [13]:
#@title LazBF MedN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 500, 0)
y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazBF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_none[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazBF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_pa[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazBF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbf[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazBF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazBF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbcdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)

Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


Accuracy: 0.8695999999999999, +/- 0.0021354156504062856
F1: 0.8696256470823721, +/- 0.0024871499812285313
Precision: 0.8697372735092648, +/- 0.00336521581046043
Recall: 0.8712, +/- 0.005276362383309158
Auroc: 0.8695999999999999, +/- 0.0021354156504062856
top_value: 1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.16it/s]


Accuracy: 0.8656, +/- 0.0035440090293338508
F1: 0.8676956858185327, +/- 0.003766491346373964
Precision: 0.8578352326171711, +/- 0.004889386095721053
Recall: 0.8791999999999998, +/- 0.005276362383309153
Auroc: 0.8656, +/- 0.0035440090293338646
top_value: 0.01
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:01<00:00,  2.18it/s]


Accuracy: 0.9704, +/- 0.000748331477354789
F1: 0.9703071799102767, +/- 0.0007798220550423932
Precision: 0.9733148868255576, +/- 0.0007225317223571208
Recall: 0.968, +/- 0.0012649110640673177
Auroc: 0.9704, +/- 0.0007483314773547653
top_value: 0.1
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.12it/s]


Accuracy: 0.8836, +/- 0.005670978751503135
F1: 0.8836167641690409, +/- 0.005503102585752662
Precision: 0.8837078961071635, +/- 0.0069918208148065444
Recall: 0.8855999999999998, +/- 0.004118252056394808
Auroc: 0.8836, +/- 0.005670978751503135
top_value: 0.1
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.88it/s]


Accuracy: 0.8792, +/- 0.0033823069050575444
F1: 0.8798733147202699, +/- 0.00344877269398239
Precision: 0.878677357461172, +/- 0.003894882966160265
Recall: 0.884, +/- 0.0060663003552412316
Auroc: 0.8792, +/- 0.0033823069050575444
top_value: 1
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.65s/it]


Accuracy: 0.8131999999999999, +/- 0.00417612260356421
F1: 0.8105243360519081, +/- 0.005492504137541245
Precision: 0.8226807038975705, +/- 0.004072660630348838
Recall: 0.8000000000000002, +/- 0.011798304963002104
Auroc: 0.8131999999999999, +/- 0.004176122603564221
top_value: 50
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.55s/it]


Accuracy: 0.8315999999999999, +/- 0.0038157568056677665
F1: 0.8349407710533324, +/- 0.004342376990461728
Precision: 0.822709258465348, +/- 0.004566384394717513
Recall: 0.8512000000000001, +/- 0.009243376006633091
Auroc: 0.8315999999999999, +/- 0.0038157568056678
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.78s/it]


Accuracy: 0.9644, +/- 0.001326649916142161
F1: 0.9638112141026852, +/- 0.0013541973362610512
Precision: 0.9771071178517661, +/- 0.002042834185101112
Recall: 0.9512, +/- 0.0007999999999999785
Auroc: 0.9644, +/- 0.001326649916142161
top_value: 100
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.71s/it]


Accuracy: 0.8464, +/- 0.004664761515876222
F1: 0.842895720437941, +/- 0.005108155610943407
Precision: 0.865188501542393, +/- 0.005347082661412818
Recall: 0.8240000000000001, +/- 0.007797435475847175
Auroc: 0.8463999999999998, +/- 0.004664761515876195
top_value: 50
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.79s/it]


Accuracy: 0.8364, +/- 0.002561249694973126
F1: 0.831267326164383, +/- 0.002711154132343183
Precision: 0.8585052075222739, +/- 0.002924003247782075
Recall: 0.8072000000000001, +/- 0.0038781438859330645
Auroc: 0.8364, +/- 0.002561249694973126
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.73s/it]


Accuracy: 0.836, +/- 0.00424264068711927
F1: 0.8360133931213973, +/- 0.00415060061850473
Precision: 0.8360737337597058, +/- 0.004366532206072179
Recall: 0.8384, +/- 0.005455272678794343
Auroc: 0.836, +/- 0.00424264068711927
top_value: 100
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:08<00:00,  2.05s/it]


Accuracy: 0.8315999999999999, +/- 0.0052687759489278355
F1: 0.8319356468360745, +/- 0.005514921721259502
Precision: 0.8313236751906266, +/- 0.007474813377603773
Recall: 0.8352, +/- 0.0090686272390037
Auroc: 0.8315999999999999, +/- 0.00526877594892784
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.76s/it]


Accuracy: 0.9624, +/- 0.0028565713714171223
F1: 0.9623091256186236, +/- 0.0028152211478223027
Precision: 0.9654141540376834, +/- 0.0037186135995611147
Recall: 0.96, +/- 0.0021908902300206866
Auroc: 0.9624, +/- 0.0028565713714171223
top_value: 25
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.73s/it]


Accuracy: 0.8488, +/- 0.004882622246293486
F1: 0.849209976253191, +/- 0.005128407905863412
Precision: 0.8486467523579886, +/- 0.00590141653824132
Recall: 0.8535999999999999, +/- 0.006997142273814351
Auroc: 0.8488, +/- 0.004882622246293486
top_value: 100
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.97s/it]


Accuracy: 0.8404, +/- 0.006939740629158988
F1: 0.8424756638722803, +/- 0.006450009301837145
Precision: 0.8322882085561133, +/- 0.008726403715883603
Recall: 0.8544, +/- 0.0072221880341071165
Auroc: 0.8404, +/- 0.006939740629158963
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Accuracy: 0.8591999999999999, +/- 0.0023323807579381144
F1: 0.8591648566384771, +/- 0.0020256820238752043
Precision: 0.8622290791164401, +/- 0.0038670608594500336
Recall: 0.8583999999999999, +/- 0.0027129319932501
Auroc: 0.8591999999999999, +/- 0.00233238075793813
top_value: 5
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:03<00:00,  1.10it/s]


Accuracy: 0.8655999999999999, +/- 0.0017204650534085061
F1: 0.867853317991746, +/- 0.0017303476528680599
Precision: 0.8578448671830212, +/- 0.002956137097576317
Recall: 0.8808, +/- 0.004079215610874231
Auroc: 0.8655999999999999, +/- 0.0017204650534085166
top_value: 1
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.58it/s]


Accuracy: 0.9632, +/- 0.0016248076809271838
F1: 0.9631081584288314, +/- 0.0017202671352025206
Precision: 0.9643013815525283, +/- 0.0013312766955019813
Recall: 0.9623999999999999, +/- 0.0027129319932501124
Auroc: 0.9632, +/- 0.0016248076809271838
top_value: 1
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:03<00:00,  1.16it/s]


Accuracy: 0.8800000000000001, +/- 0.003687817782917152
F1: 0.8770342368988885, +/- 0.003483191653579306
Precision: 0.8985211814078908, +/- 0.007951196086011069
Recall: 0.8592000000000001, +/- 0.0049638694583963635
Auroc: 0.8800000000000001, +/- 0.003687817782917152
top_value: 0.1
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.80it/s]


Accuracy: 0.8703999999999998, +/- 0.0022271057451320364
F1: 0.8711001927348496, +/- 0.0022079574849935965
Precision: 0.8680598885756219, +/- 0.0030690878138038137
Recall: 0.8752000000000001, +/- 0.00233238075793813
Auroc: 0.8703999999999998, +/- 0.0022271057451320364
top_value: 1
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:19<00:00,  4.88s/it]


Accuracy: 0.8592000000000001, +/- 0.0037735924528226046
F1: 0.8605589558591037, +/- 0.003555340265394746
Precision: 0.8560923809263178, +/- 0.005303587116990492
Recall: 0.8672000000000001, +/- 0.0036660605559646706
Auroc: 0.8591999999999999, +/- 0.0037735924528226115
top_value: 200
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:19<00:00,  4.99s/it]


Accuracy: 0.858, +/- 0.005366563145999478
F1: 0.8602288682863912, +/- 0.005089888151985319
Precision: 0.8485557575020843, +/- 0.008333499276283516
Recall: 0.8736, +/- 0.0032496153618543646
Auroc: 0.858, +/- 0.005366563145999468
top_value: 500
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:19<00:00,  4.81s/it]


Accuracy: 0.9655999999999999, +/- 0.002315167380558047
F1: 0.9653480629540102, +/- 0.002307170544114346
Precision: 0.9720832467675024, +/- 0.002468996118089975
Recall: 0.9591999999999998, +/- 0.0023323807579381526
Auroc: 0.9655999999999999, +/- 0.002315167380558047
top_value: 500
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:19<00:00,  4.91s/it]


Accuracy: 0.8556000000000001, +/- 0.007704544113703302
F1: 0.8567434759612975, +/- 0.007995622330264194
Precision: 0.8544350504228996, +/- 0.006422004468171654
Recall: 0.8615999999999999, +/- 0.009682974749528204
Auroc: 0.8556000000000001, +/- 0.007704544113703303
top_value: 100
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:23<00:00,  5.76s/it]


Accuracy: 0.8735999999999999, +/- 0.003867815921162745
F1: 0.8747572758813614, +/- 0.0036516692631172187
Precision: 0.868152806516215, +/- 0.005569696875453738
Recall: 0.8832000000000001, +/- 0.003200000000000011
Auroc: 0.8735999999999999, +/- 0.003867815921162745
top_value: 200
[[0.8695999999999999, 0.8131999999999999, 0.836, 0.8591999999999999, 0.8592000000000001], [0.8656, 0.8315999999999999, 0.8315999999999999, 0.8655999999999999, 0.858], [0.9704, 0.9644, 0.9624, 0.9632, 0.9655999999999999], [0.8836, 0.8464, 0.8488, 0.8800000000000001, 0.8556000000000001], [0.8792, 0.8364, 0.8404, 0.8703999999999998, 0.8735999999999999]]
[[0.0021354156504062856, 0.00417612260356421, 0.00424264068711927, 0.0023323807579381144, 0.0037735924528226046], [0.0035440090293338508, 0.0038157568056677665, 0.0052687759489278355, 0.0017204650534085061, 0.005366563145999478], [0.000748331477354789, 0.001326649916142161, 0.0028565713714171223, 0.0016248076809271838, 0.002315167380558047], [0.005670978751503135, 

In [8]:
#@title LazBF LowN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 200, 1)
y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazBF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_none[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazBF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_pa[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazBF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbf[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazBF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazBF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazbf_mlm_lazbcdef[idxs], LazBF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)

Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.93it/s]


Accuracy: 0.8700000000000001, +/- 0.009354143466934847
F1: 0.8727418219545873, +/- 0.009762900835015605
Precision: 0.8608484441748405, +/- 0.008786641630604202
Recall: 0.8899999999999999, +/- 0.011401754250991386
Auroc: 0.8699999999999999, +/- 0.009354143466934835
top_value: 1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:01<00:00,  2.18it/s]


Accuracy: 0.89, +/- 0.0068920243760451005
F1: 0.8920922203363615, +/- 0.006386478573034399
Precision: 0.8746721510044972, +/- 0.012121899113899286
Recall: 0.9139999999999999, +/- 0.008124038404635985
Auroc: 0.89, +/- 0.0068920243760451005
top_value: 0.1
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.36it/s]


Accuracy: 0.9730000000000001, +/- 0.003741657386773921
F1: 0.9722066203657989, +/- 0.004029041006400041
Precision: 0.9824661654135338, +/- 0.0020087777928829054
Recall: 0.9640000000000001, +/- 0.006000000000000026
Auroc: 0.9730000000000001, +/- 0.003741657386773921
top_value: 0.1
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:01<00:00,  2.38it/s]


Accuracy: 0.9110000000000001, +/- 0.007314369419163908
F1: 0.9141093343844922, +/- 0.00656979181704988
Precision: 0.8972180860075598, +/- 0.008781913854283644
Recall: 0.9359999999999999, +/- 0.005999999999999999
Auroc: 0.9110000000000001, +/- 0.007314369419163908
top_value: 0.01
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:01<00:00,  2.16it/s]


Accuracy: 0.8880000000000001, +/- 0.010559356040971447
F1: 0.8888389592714159, +/- 0.010469412981457813
Precision: 0.8807473142672227, +/- 0.010279360508321638
Recall: 0.906, +/- 0.012489995996796783
Auroc: 0.8879999999999999, +/- 0.010559356040971421
top_value: 0.01
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


Accuracy: 0.8380000000000001, +/- 0.0090277350426339
F1: 0.8376978077064932, +/- 0.009128033630793205
Precision: 0.8377137606863008, +/- 0.0090549334521281
Recall: 0.844, +/- 0.01363818169698587
Auroc: 0.8380000000000001, +/- 0.0090277350426339
top_value: 50
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.03s/it]


Accuracy: 0.898, +/- 0.002549509756796386
F1: 0.9017580826044866, +/- 0.0021699608826791774
Precision: 0.8801386172942237, +/- 0.005162689356926266
Recall: 0.9279999999999999, +/- 0.0020000000000000183
Auroc: 0.898, +/- 0.002549509756796386
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.11s/it]


Accuracy: 0.9800000000000001, +/- 0.002236067977499804
F1: 0.979619598664528, +/- 0.0023945596061138584
Precision: 0.9864761904761906, +/- 0.002355097717881563
Recall: 0.974, +/- 0.004000000000000028
Auroc: 0.9800000000000001, +/- 0.002236067977499804
top_value: 50
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.24s/it]


Accuracy: 0.8800000000000001, +/- 0.007905694150420965
F1: 0.8850541683009068, +/- 0.008547545720657414
Precision: 0.8518456378099397, +/- 0.003936112537348574
Recall: 0.9259999999999999, +/- 0.012083045973594579
Auroc: 0.8800000000000001, +/- 0.007905694150420944
top_value: 50
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.37s/it]


Accuracy: 0.852, +/- 0.006244997998398396
F1: 0.84471014867934, +/- 0.005771750339906213
Precision: 0.8861669369502186, +/- 0.011028848657791376
Recall: 0.8099999999999999, +/- 0.0031622776601683646
Auroc: 0.852, +/- 0.006244997998398389
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.52s/it]


Accuracy: 0.834, +/- 0.010049875621120889
F1: 0.8359246160283039, +/- 0.010484206678392432
Precision: 0.826802842435081, +/- 0.010160255317194982
Recall: 0.8539999999999999, +/- 0.011224972160321832
Auroc: 0.834, +/- 0.010049875621120908
top_value: 100
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.68s/it]


Accuracy: 0.8639999999999999, +/- 0.002915475947422645
F1: 0.8706624151997897, +/- 0.0030554376035916934
Precision: 0.8371579811582504, +/- 0.0022479028837225174
Recall: 0.9120000000000001, +/- 0.0048989794855663635
Auroc: 0.8639999999999999, +/- 0.002915475947422645
top_value: 25
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.33s/it]


Accuracy: 0.9690000000000001, +/- 0.002915475947422662
F1: 0.9684557542395558, +/- 0.0032023675511491058
Precision: 0.9813049752180187, +/- 0.0026730389429245557
Recall: 0.958, +/- 0.00734846922834955
Auroc: 0.9690000000000001, +/- 0.002915475947422662
top_value: 5
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


Accuracy: 0.876, +/- 0.005787918451395109
F1: 0.879019337448749, +/- 0.0062303637428715395
Precision: 0.8658119715704048, +/- 0.004930531806662625
Recall: 0.8979999999999999, +/- 0.013190905958272941
Auroc: 0.876, +/- 0.005787918451395109
top_value: 50
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.49s/it]


Accuracy: 0.865, +/- 0.005477225575051651
F1: 0.8649951177043753, +/- 0.006057187318782943
Precision: 0.8648094695391505, +/- 0.005341608202855882
Recall: 0.8720000000000001, +/- 0.0073484692283495405
Auroc: 0.865, +/- 0.005477225575051651
top_value: 100
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Accuracy: 0.866, +/- 0.005567764362830031
F1: 0.8690578539618155, +/- 0.0049494715354859815
Precision: 0.8564739755301606, +/- 0.005944192543309613
Recall: 0.8880000000000001, +/- 0.0048989794855663765
Auroc: 0.866, +/- 0.005567764362830031
top_value: 1
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  3.03it/s]


Accuracy: 0.9040000000000001, +/- 0.007648529270389173
F1: 0.9056746964081352, +/- 0.007843393264955981
Precision: 0.8975008866038616, +/- 0.006234290983292537
Recall: 0.9179999999999999, +/- 0.009695359714832668
Auroc: 0.9040000000000001, +/- 0.007648529270389154
top_value: 1
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  2.58it/s]


Accuracy: 0.9630000000000001, +/- 0.0033911649915626175
F1: 0.9626226880013785, +/- 0.003513410670837859
Precision: 0.9629611908559277, +/- 0.004061160247651695
Recall: 0.9640000000000001, +/- 0.004000000000000026
Auroc: 0.9630000000000001, +/- 0.0033911649915626175
top_value: 5
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  2.60it/s]


Accuracy: 0.921, +/- 0.009273618495495727
F1: 0.9224958930770797, +/- 0.009333006270271125
Precision: 0.9039165964978322, +/- 0.009420938672226768
Recall: 0.944, +/- 0.012083045973594572
Auroc: 0.921, +/- 0.009273618495495722
top_value: 1
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.52it/s]


Accuracy: 0.892, +/- 0.0071763500472036374
F1: 0.8924283320558575, +/- 0.007016188704275597
Precision: 0.8877846027456338, +/- 0.009139232405730516
Recall: 0.906, +/- 0.008717797887081312
Auroc: 0.892, +/- 0.007176350047203639
top_value: 1
Low-N, LazBF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.97s/it]


Accuracy: 0.859, +/- 0.006000000000000024
F1: 0.8607325890086136, +/- 0.005792915934238346
Precision: 0.8504569869997788, +/- 0.007096176464746478
Recall: 0.8780000000000001, +/- 0.007348469228349534
Auroc: 0.859, +/- 0.006000000000000011
top_value: 100
Low-N, LazBF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:09<00:00,  2.45s/it]


Accuracy: 0.858, +/- 0.004898979485566354
F1: 0.858391364817015, +/- 0.005864757659881678
Precision: 0.8556519522863579, +/- 0.008460924736385175
Recall: 0.868, +/- 0.014282856857085705
Auroc: 0.858, +/- 0.004898979485566354
top_value: 100
Low-N, LazBF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.76s/it]


Accuracy: 0.969, +/- 0.005787918451395114
F1: 0.9685549521082255, +/- 0.005899447232791721
Precision: 0.9801804511278196, +/- 0.00553092232947518
Recall: 0.958, +/- 0.006633249580710839
Auroc: 0.969, +/- 0.005787918451395114
top_value: 200
Low-N, LazBF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:08<00:00,  2.18s/it]


Accuracy: 0.861, +/- 0.009669539802906895
F1: 0.8631975649467727, +/- 0.009378931281250572
Precision: 0.8512529037197231, +/- 0.010723084279995895
Recall: 0.8800000000000001, +/- 0.010000000000000031
Auroc: 0.861, +/- 0.009669539802906895
top_value: 500
Low-N, LazBF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:10<00:00,  2.67s/it]


Accuracy: 0.8699999999999999, +/- 0.0031622776601684085
F1: 0.8745671409180666, +/- 0.0036724927042470655
Precision: 0.8540864611032466, +/- 0.003632626567489437
Recall: 0.9019999999999999, +/- 0.005830951894845296
Auroc: 0.8699999999999999, +/- 0.0031622776601684085
top_value: 500
[[0.8700000000000001, 0.8380000000000001, 0.834, 0.866, 0.859], [0.89, 0.898, 0.8639999999999999, 0.9040000000000001, 0.858], [0.9730000000000001, 0.9800000000000001, 0.9690000000000001, 0.9630000000000001, 0.969], [0.9110000000000001, 0.8800000000000001, 0.876, 0.921, 0.861], [0.8880000000000001, 0.852, 0.865, 0.892, 0.8699999999999999]]
[[0.009354143466934847, 0.0090277350426339, 0.010049875621120889, 0.005567764362830031, 0.006000000000000024], [0.0068920243760451005, 0.002549509756796386, 0.002915475947422645, 0.007648529270389173, 0.004898979485566354], [0.003741657386773921, 0.002236067977499804, 0.002915475947422662, 0.0033911649915626175, 0.005787918451395114], [0.007314369419163908, 0.00790569415042

---

In [23]:
#@title LazDEF LowN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 200, 5)

y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazDEF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_none[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazDEF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_pa[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazDEF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazbf[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazDEF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazbcdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)
# for model, param, grid in zip(model_list, param_list, value_list):

#   # Lazdef prediction
#   print(f"Low-N, LazDEF-task, Vanilla-ESM-Embeddings {model}")
#   optimize(lazdef_mlm_none[idxs], LazDEF_sample_labels[idxs], model, param, grid)

#   print(f"Low-N, LazDEF-task, Peptide-ESM-Embeddings {model}")
#   optimize(lazdef_mlm_pa[idxs], LazDEF_sample_labels[idxs], model, param, grid)

#   print(f"Low-N, LazDEF-task, LazBF-ESM-Embeddings {model}")
#   optimize(lazdef_mlm_lazbf[idxs], LazDEF_sample_labels[idxs], model, param, grid)

#   print(f"Low-N, LazDEF-task, LazDEF-ESM-Embeddings {model}")
#   optimize(lazdef_mlm_lazdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)

Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.12it/s]


Accuracy: 0.767, +/- 0.006633249580710823
F1: 0.7710076583314395, +/- 0.006257862992320949
Precision: 0.7611296091533539, +/- 0.009555103638299763
Recall: 0.7879999999999999, +/- 0.01157583690279022
Auroc: 0.767, +/- 0.006633249580710803
top_value: 1
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.87it/s]


Accuracy: 0.779, +/- 0.006595452979136473
F1: 0.7812015240222528, +/- 0.008836712740705417
Precision: 0.7742616338254109, +/- 0.003563318980619485
Recall: 0.798, +/- 0.01593737745050924
Auroc: 0.779, +/- 0.006595452979136465
top_value: 0.1
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.41it/s]


Accuracy: 0.8280000000000001, +/- 0.006442049363362577
F1: 0.825672156071974, +/- 0.005776949530736092
Precision: 0.8344598388778628, +/- 0.00898627535855867
Recall: 0.8240000000000001, +/- 0.007483314773547891
Auroc: 0.8280000000000001, +/- 0.006442049363362577
top_value: 0.01
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.13it/s]


Accuracy: 0.9710000000000001, +/- 0.0009999999999999898
F1: 0.9701349527665318, +/- 0.0009248056198065844
Precision: 0.99, +/- 0.0031622776601683646
Recall: 0.952, +/- 0.0020000000000000018
Auroc: 0.9710000000000001, +/- 0.0009999999999999898
top_value: 0.1
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:01<00:00,  2.79it/s]


Accuracy: 0.8170000000000002, +/- 0.009027735042633914
F1: 0.8179161319888039, +/- 0.009072787947792472
Precision: 0.8126145424198628, +/- 0.007486334183633206
Recall: 0.8299999999999998, +/- 0.009486832980505122
Auroc: 0.8170000000000002, +/- 0.009027735042633876
top_value: 0.01
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.02s/it]


Accuracy: 0.681, +/- 0.010653637876331263
F1: 0.6898655895755688, +/- 0.010811732489624947
Precision: 0.6714466251655644, +/- 0.014245875137255747
Recall: 0.716, +/- 0.014696938456699058
Auroc: 0.681, +/- 0.010653637876331263
top_value: 25
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.16s/it]


Accuracy: 0.7470000000000001, +/- 0.011895377253370297
F1: 0.741899557550392, +/- 0.013009241253768355
Precision: 0.7571880092387833, +/- 0.011195384380124428
Recall: 0.732, +/- 0.017720045146669368
Auroc: 0.7470000000000001, +/- 0.011895377253370297
top_value: 100
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.24s/it]


Accuracy: 0.744, +/- 0.023473389188610996
F1: 0.7394693315068275, +/- 0.02326060683145888
Precision: 0.7555842421289991, +/- 0.025893174628821573
Recall: 0.73, +/- 0.0244948974278318
Auroc: 0.744, +/- 0.023473389188610996
top_value: 50
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


Accuracy: 0.951, +/- 0.0033166247903553743
F1: 0.9500761451826921, +/- 0.0032847475956509914
Precision: 0.9651747550694919, +/- 0.005685759036845261
Recall: 0.938, +/- 0.003741657386773932
Auroc: 0.951, +/- 0.0033166247903553743
top_value: 25
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.16s/it]


Accuracy: 0.7219999999999999, +/- 0.01488287606613722
F1: 0.710230799241873, +/- 0.016575841608298256
Precision: 0.7407629532639628, +/- 0.02086413504032581
Recall: 0.6899999999999998, +/- 0.023021728866442687
Auroc: 0.7219999999999999, +/- 0.01488287606613722
top_value: 50
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.28s/it]


Accuracy: 0.7130000000000001, +/- 0.015459624833740295
F1: 0.7125109274009201, +/- 0.013438814999327454
Precision: 0.7187298727760029, +/- 0.019608776925416658
Recall: 0.7139999999999999, +/- 0.014696938456699062
Auroc: 0.7130000000000001, +/- 0.015459624833740308
top_value: 50
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.57s/it]


Accuracy: 0.6979999999999998, +/- 0.008000000000000007
F1: 0.7000462263103865, +/- 0.008559776193719127
Precision: 0.7003797112862491, +/- 0.010582815454618826
Recall: 0.706, +/- 0.01691153452528778
Auroc: 0.6979999999999998, +/- 0.008000000000000007
top_value: 50
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.61s/it]


Accuracy: 0.7310000000000001, +/- 0.008717797887081357
F1: 0.7268841637778557, +/- 0.012271820222331843
Precision: 0.7407530410735822, +/- 0.005884393530181435
Recall: 0.72, +/- 0.020248456731316596
Auroc: 0.7310000000000001, +/- 0.008717797887081345
top_value: 25
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:04<00:00,  1.24s/it]


Accuracy: 0.958, +/- 0.00406201920231799
F1: 0.9563981359514093, +/- 0.003825863995149525
Precision: 0.9762166021113389, +/- 0.006150706583037851
Recall: 0.9400000000000001, +/- 0.0031622776601683646
Auroc: 0.958, +/- 0.00406201920231799
top_value: 25
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.37s/it]


Accuracy: 0.7500000000000001, +/- 0.013416407864998718
F1: 0.7506015368460364, +/- 0.012874785130960386
Precision: 0.7480536777695614, +/- 0.016567275211644885
Recall: 0.758, +/- 0.01392838827718412
Auroc: 0.7500000000000001, +/- 0.013416407864998717
top_value: 100
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  2.21it/s]


Accuracy: 0.7529999999999999, +/- 0.008000000000000009
F1: 0.7320538216351427, +/- 0.010963841664939621
Precision: 0.8027252235208892, +/- 0.006176624745075194
Recall: 0.6839999999999999, +/- 0.020149441679609886
Auroc: 0.7529999999999999, +/- 0.008000000000000009
top_value: 0.01
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  2.41it/s]


Accuracy: 0.7779999999999999, +/- 0.011135528725660069
F1: 0.7696571584683041, +/- 0.013554169020662773
Precision: 0.8022300404789569, +/- 0.012069251040041792
Recall: 0.746, +/- 0.02039607805437113
Auroc: 0.7780000000000001, +/- 0.011135528725660034
top_value: 5
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  2.51it/s]


Accuracy: 0.8140000000000001, +/- 0.007648529270389165
F1: 0.8138480141410105, +/- 0.006549840481048612
Precision: 0.8110249956247669, +/- 0.011867490666876377
Recall: 0.826, +/- 0.008124038404635948
Auroc: 0.8140000000000001, +/- 0.00764852927038916
top_value: 1
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.64it/s]


Accuracy: 0.9489999999999998, +/- 0.0018708286933869424
F1: 0.9483533749438756, +/- 0.001963956833394818
Precision: 0.9589411407306143, +/- 0.0015591685042518142
Recall: 0.9400000000000001, +/- 0.0031622776601683646
Auroc: 0.9489999999999998, +/- 0.0018708286933869546
top_value: 5
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:01<00:00,  2.45it/s]


Accuracy: 0.8009999999999999, +/- 0.0018708286933869693
F1: 0.7758847272680118, +/- 0.003611539007418303
Precision: 0.8793613739201975, +/- 0.008978695447158179
Recall: 0.7, +/- 0.010954451150103347
Auroc: 0.8009999999999999, +/- 0.0018708286933869635
top_value: 0.01
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.87s/it]


Accuracy: 0.7779999999999999, +/- 0.0033911649915626388
F1: 0.783369435009761, +/- 0.0037722434206348895
Precision: 0.7697459278087491, +/- 0.0022303221515658474
Recall: 0.8039999999999999, +/- 0.007483314773547864
Auroc: 0.7779999999999999, +/- 0.003391164991562617
top_value: 500
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.84s/it]


Accuracy: 0.7700000000000001, +/- 0.0070710678118654805
F1: 0.7728100845741596, +/- 0.00726046356181786
Precision: 0.7659552915967238, +/- 0.008590603104684683
Recall: 0.7859999999999999, +/- 0.011661903789690595
Auroc: 0.7700000000000001, +/- 0.0070710678118654805
top_value: 200
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.35s/it]


Accuracy: 0.7929999999999999, +/- 0.004636809247747867
F1: 0.7925999251695081, +/- 0.007436808601572019
Precision: 0.7961736361881738, +/- 0.008800917393502458
Recall: 0.796, +/- 0.01964688270438852
Auroc: 0.7929999999999999, +/- 0.004636809247747867
top_value: 200
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:10<00:00,  2.61s/it]


Accuracy: 0.953, +/- 0.002000000000000035
F1: 0.951203806512282, +/- 0.0021345450768818375
Precision: 0.9753653435301033, +/- 0.007842520138292763
Recall: 0.932, +/- 0.008000000000000004
Auroc: 0.953, +/- 0.002000000000000035
top_value: 500
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:09<00:00,  2.37s/it]


Accuracy: 0.825, +/- 0.0031622776601683555
F1: 0.8259528333088639, +/- 0.003685554815380428
Precision: 0.8252195343699988, +/- 0.005255851708816717
Recall: 0.8320000000000001, +/- 0.007348469228349525
Auroc: 0.825, +/- 0.0031622776601683555
top_value: 500
[[0.767, 0.681, 0.7130000000000001, 0.7529999999999999, 0.7779999999999999], [0.779, 0.7470000000000001, 0.6979999999999998, 0.7779999999999999, 0.7700000000000001], [0.8280000000000001, 0.744, 0.7310000000000001, 0.8140000000000001, 0.7929999999999999], [0.9710000000000001, 0.951, 0.958, 0.9489999999999998, 0.953], [0.8170000000000002, 0.7219999999999999, 0.7500000000000001, 0.8009999999999999, 0.825]]
[[0.006633249580710823, 0.010653637876331263, 0.015459624833740295, 0.008000000000000009, 0.0033911649915626388], [0.006595452979136473, 0.011895377253370297, 0.008000000000000007, 0.011135528725660069, 0.0070710678118654805], [0.006442049363362577, 0.023473389188610996, 0.008717797887081357, 0.007648529270389165, 0.004636809247747867]

In [18]:
#@title LazDEF MedN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 500, 1)

y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazDEF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_none[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazDEF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_pa[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazDEF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazbf[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazDEF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazbcdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)

Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.50it/s]


Accuracy: 0.8084000000000001, +/- 0.0022271057451320173
F1: 0.8106541607600484, +/- 0.0026564954409745906
Precision: 0.8025775640187744, +/- 0.00219796256519915
Recall: 0.8208, +/- 0.005425863986500221
Auroc: 0.8084000000000001, +/- 0.0022271057451320173
top_value: 0.01
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.65it/s]


Accuracy: 0.8256, +/- 0.0036551333764993986
F1: 0.8298321502739443, +/- 0.0030618013936664175
Precision: 0.812936728292956, +/- 0.004714192507868749
Recall: 0.8503999999999999, +/- 0.0029933259094191526
Auroc: 0.8256, +/- 0.0036551333764993986
top_value: 0.1
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.39it/s]


Accuracy: 0.8428000000000001, +/- 0.006019966777316978
F1: 0.8427485240696531, +/- 0.006054782479910954
Precision: 0.8433032013809569, +/- 0.0058559873873022675
Recall: 0.8440000000000001, +/- 0.007155417527999314
Auroc: 0.8427999999999999, +/- 0.006019966777316948
top_value: 0.1
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:02<00:00,  1.87it/s]


Accuracy: 0.976, +/- 0.0010954451150103736
F1: 0.9759112660488247, +/- 0.0011197471140261324
Precision: 0.9788155442147831, +/- 0.0010416752591003993
Recall: 0.9736, +/- 0.0024000000000000167
Auroc: 0.976, +/- 0.0010954451150103736
top_value: 0.01
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:01<00:00,  2.21it/s]


Accuracy: 0.8392000000000002, +/- 0.0036110940170535344
F1: 0.8390462496611578, +/- 0.003551776743547237
Precision: 0.8392828418123583, +/- 0.005321871021973519
Recall: 0.8423999999999999, +/- 0.007756287771866144
Auroc: 0.8392, +/- 0.0036110940170535747
top_value: 1
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.93s/it]


Accuracy: 0.7652, +/- 0.007337574531137656
F1: 0.7663068715662176, +/- 0.00803608837095016
Precision: 0.7655427834546632, +/- 0.006103311047274312
Recall: 0.7704, +/- 0.010628264204469133
Auroc: 0.7652, +/- 0.007337574531137642
top_value: 100
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.66s/it]


Accuracy: 0.778, +/- 0.00419523539268062
F1: 0.7787805780301976, +/- 0.004227289046167459
Precision: 0.7777464474187658, +/- 0.0030974386891240205
Recall: 0.7840000000000001, +/- 0.005366563145999487
Auroc: 0.778, +/- 0.00419523539268061
top_value: 100
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:05<00:00,  1.43s/it]


Accuracy: 0.7904000000000001, +/- 0.007110555533852492
F1: 0.7931560651606677, +/- 0.007689228916738918
Precision: 0.784229852834738, +/- 0.006934514117098924
Recall: 0.8048, +/- 0.010229369482035521
Auroc: 0.7904, +/- 0.0071105555338524615
top_value: 100
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.77s/it]


Accuracy: 0.976, +/- 0.001264911064067388
F1: 0.9760496965894421, +/- 0.001271592370976085
Precision: 0.9757344546526884, +/- 0.0015104781334794475
Recall: 0.9768000000000001, +/- 0.0014966629547096195
Auroc: 0.976, +/- 0.001264911064067388
top_value: 50
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.65s/it]


Accuracy: 0.7735999999999998, +/- 0.006675327707311456
F1: 0.7705648363882085, +/- 0.006802939294950921
Precision: 0.7819205551441779, +/- 0.0077808233390668086
Recall: 0.7624000000000001, +/- 0.008352245207128432
Auroc: 0.7735999999999998, +/- 0.006675327707311443
top_value: 100
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:08<00:00,  2.09s/it]


Accuracy: 0.7556, +/- 0.007756287771866114
F1: 0.756128995263894, +/- 0.008453740844964525
Precision: 0.7526353841459634, +/- 0.008327115929460461
Recall: 0.7615999999999999, +/- 0.011356055653262703
Auroc: 0.7556, +/- 0.007756287771866102
top_value: 100
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.97s/it]


Accuracy: 0.7487999999999999, +/- 0.002799999999999984
F1: 0.7509465988150337, +/- 0.0043952194320758965
Precision: 0.7440952881230282, +/- 0.0041350169556476565
Recall: 0.7608, +/- 0.012092973166264778
Auroc: 0.7487999999999999, +/- 0.002799999999999984
top_value: 50
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:08<00:00,  2.10s/it]


Accuracy: 0.7836000000000001, +/- 0.004707440918375938
F1: 0.7853523729572286, +/- 0.004390701919443982
Precision: 0.7798766276113461, +/- 0.006472546905975871
Recall: 0.7935999999999999, +/- 0.003919183588453067
Auroc: 0.7836000000000001, +/- 0.00470744091837596
top_value: 100
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:06<00:00,  1.74s/it]


Accuracy: 0.9616, +/- 0.0025612496949731422
F1: 0.9608763656383108, +/- 0.0026516518653806782
Precision: 0.9726703630922995, +/- 0.0034869285228275658
Recall: 0.9503999999999999, +/- 0.004118252056394808
Auroc: 0.9616, +/- 0.0025612496949731422
top_value: 5
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:07<00:00,  1.90s/it]


Accuracy: 0.766, +/- 0.00807465169527452
F1: 0.7667609326907996, +/- 0.008421090377401106
Precision: 0.7644905996653778, +/- 0.006710753520965178
Recall: 0.7712, +/- 0.012547509713086508
Auroc: 0.766, +/- 0.00807465169527452
top_value: 100
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.77it/s]


Accuracy: 0.7936, +/- 0.006462197768561403
F1: 0.7962743520208251, +/- 0.005270878201348073
Precision: 0.7886504730397761, +/- 0.010141208691752114
Recall: 0.8064, +/- 0.0037094473981983
Auroc: 0.7936, +/- 0.006462197768561403
top_value: 5
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.43it/s]


Accuracy: 0.8208, +/- 0.005535341001239229
F1: 0.8239172697423957, +/- 0.005805919688057859
Precision: 0.8102325945911947, +/- 0.004595358260222631
Recall: 0.8400000000000001, +/- 0.0074833147735478694
Auroc: 0.8208, +/- 0.005535341001239212
top_value: 5
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.67it/s]


Accuracy: 0.8400000000000001, +/- 0.007402702209328691
F1: 0.8446723462436811, +/- 0.00713674645371096
Precision: 0.8232434868925784, +/- 0.008036486190973875
Recall: 0.8696000000000002, +/- 0.007652450587883606
Auroc: 0.8400000000000001, +/- 0.0074027022093286834
top_value: 1
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:04<00:00,  1.11s/it]


Accuracy: 0.9756, +/- 0.0013266499161421945
F1: 0.9756849871045891, +/- 0.001395943478600382
Precision: 0.9733742994440157, +/- 0.0008846940860221113
Recall: 0.9783999999999999, +/- 0.0020396078054371416
Auroc: 0.9756, +/- 0.0013266499161421945
top_value: 5
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:02<00:00,  1.62it/s]


Accuracy: 0.8448, +/- 0.004127953488110054
F1: 0.8471251451728085, +/- 0.0038145199529509076
Precision: 0.8392220342381906, +/- 0.005556593223065233
Recall: 0.8584000000000002, +/- 0.004995998398718712
Auroc: 0.8448, +/- 0.004127953488110068
top_value: 5
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:23<00:00,  5.87s/it]


Accuracy: 0.806, +/- 0.005513619500836073
F1: 0.80617245005384, +/- 0.005032633311566367
Precision: 0.8067571095674675, +/- 0.008018857198525176
Recall: 0.8088000000000001, +/- 0.0049638694583963305
Auroc: 0.806, +/- 0.005513619500836093
top_value: 50
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:21<00:00,  5.35s/it]


Accuracy: 0.8276, +/- 0.0044452221541785904
F1: 0.8302562784517539, +/- 0.004528077486775041
Precision: 0.8209053336533018, +/- 0.004286297726793895
Recall: 0.8424000000000001, +/- 0.0060133185513491574
Auroc: 0.8276, +/- 0.0044452221541785904
top_value: 500
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:23<00:00,  5.87s/it]


Accuracy: 0.8343999999999999, +/- 0.0057061370470748605
F1: 0.8357262143663935, +/- 0.005618605522575245
Precision: 0.830691284307683, +/- 0.006362752364077714
Recall: 0.8432000000000001, +/- 0.005571355310873643
Auroc: 0.8343999999999999, +/- 0.0057061370470748544
top_value: 500
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:18<00:00,  4.57s/it]


Accuracy: 0.9704, +/- 0.001720465053408532
F1: 0.9700284450515543, +/- 0.0017701317331647952
Precision: 0.9786423168025818, +/- 0.001547947650362011
Recall: 0.9624, +/- 0.003249615361854376
Auroc: 0.9704, +/- 0.001720465053408532
top_value: 100
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:22<00:00,  5.64s/it]


Accuracy: 0.8383999999999998, +/- 0.005306599664568684
F1: 0.8394006235575672, +/- 0.005668985954637245
Precision: 0.836310261607012, +/- 0.0050920431669243486
Recall: 0.8448, +/- 0.007939773296511674
Auroc: 0.8383999999999998, +/- 0.0053065996645686505
top_value: 50
[[0.8084000000000001, 0.7652, 0.7556, 0.7936, 0.806], [0.8256, 0.778, 0.7487999999999999, 0.8208, 0.8276], [0.8428000000000001, 0.7904000000000001, 0.7836000000000001, 0.8400000000000001, 0.8343999999999999], [0.976, 0.976, 0.9616, 0.9756, 0.9704], [0.8392000000000002, 0.7735999999999998, 0.766, 0.8448, 0.8383999999999998]]
[[0.0022271057451320173, 0.007337574531137656, 0.007756287771866114, 0.006462197768561403, 0.005513619500836073], [0.0036551333764993986, 0.00419523539268062, 0.002799999999999984, 0.005535341001239229, 0.0044452221541785904], [0.006019966777316978, 0.007110555533852492, 0.004707440918375938, 0.007402702209328691, 0.0057061370470748605], [0.0010954451150103736, 0.001264911064067388, 0.002561249694973142

In [19]:
#@title LazDEF HighN
idxs = balanced_sample_np(lazbf_mlm_none, LazBF_sample_labels, 1000, 1)

y_values = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]
y_errors = [
    [0, 0, 0, 0, 0], # vanilla-esm
    [0, 0, 0, 0, 0], # peptide-esm
    [0, 0, 0, 0, 0], # LazBF-esm
    [0, 0, 0, 0, 0], # LazDEF-esm
    [0, 0, 0, 0, 0], # LazBCDEF-esm
]

i = 0
for model, param, grid in zip(model_list, param_list, value_list):

  # Lazbf prediction
  print(f"Low-N, LazDEF-task, Vanilla-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_none[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[0][i] = m
  y_errors[0][i] = e

  print(f"Low-N, LazDEF-task, Peptide-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_pa[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[1][i] = m
  y_errors[1][i] = e

  print(f"Low-N, LazDEF-task, LazBF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazbf[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[2][i] = m
  y_errors[2][i] = e

  print(f"Low-N, LazDEF-task, LazDEF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[3][i] = m
  y_errors[3][i] = e

  print(f"Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings {model}")
  m, e = optimize(lazdef_mlm_lazbcdef[idxs], LazDEF_sample_labels[idxs], model, param, grid)
  y_values[4][i] = m
  y_errors[4][i] = e

  i += 1
print(y_values)
print(y_errors)

Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.03it/s]


Accuracy: 0.8026, +/- 0.001503329637837292
F1: 0.8041688854963777, +/- 0.0011139568430028198
Precision: 0.8000366633920054, +/- 0.00214785640005595
Recall: 0.8099999999999999, +/- 0.0008944271909998918
Auroc: 0.8026, +/- 0.0015033296378372898
top_value: 0.1
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:05<00:00,  1.45s/it]


Accuracy: 0.8106, +/- 0.003108054053583994
F1: 0.811123943122737, +/- 0.0031752698120635576
Precision: 0.8100937753628727, +/- 0.003201394101653392
Recall: 0.8135999999999999, +/- 0.003969886648255866
Auroc: 0.8106, +/- 0.003108054053584024
top_value: 5
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.07it/s]


Accuracy: 0.8311999999999999, +/- 0.0020346989949375635
F1: 0.8318771977559024, +/- 0.0021212875656153977
Precision: 0.8294625126923562, +/- 0.0018465290862842321
Recall: 0.8348000000000001, +/- 0.0025768197453449894
Auroc: 0.8311999999999999, +/- 0.0020346989949375626
top_value: 0.01
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:03<00:00,  1.03it/s]


Accuracy: 0.9639999999999999, +/- 0.001788854381999855
F1: 0.9635632666269881, +/- 0.0017874723922371295
Precision: 0.9730555137050005, +/- 0.001832040987159815
Recall: 0.9548, +/- 0.0019595917942265314
Auroc: 0.9639999999999999, +/- 0.0017888543819998364
top_value: 0.01
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.linear_model._logistic.LogisticRegression'>


100%|██████████| 4/4 [00:06<00:00,  1.55s/it]


Accuracy: 0.8234, +/- 0.00256124969497318
F1: 0.8240367200464934, +/- 0.002497276741487905
Precision: 0.8207002389363705, +/- 0.003389315203205123
Recall: 0.8292000000000002, +/- 0.0028705400188814515
Auroc: 0.8234, +/- 0.002561249694973143
top_value: 5
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.84s/it]


Accuracy: 0.7605999999999999, +/- 0.003414674215792751
F1: 0.7623492144814887, +/- 0.003648583474478859
Precision: 0.7580452917142514, +/- 0.002721757106588655
Recall: 0.7687999999999999, +/- 0.004923413450036472
Auroc: 0.7605999999999999, +/- 0.003414674215792768
top_value: 100
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.81s/it]


Accuracy: 0.7614000000000001, +/- 0.0049658836071740575
F1: 0.7582665424169549, +/- 0.0061122944765716145
Precision: 0.7700475398499854, +/- 0.003916868101400566
Recall: 0.7487999999999999, +/- 0.009645724441430003
Auroc: 0.7614000000000001, +/- 0.004965883607174039
top_value: 100
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:11<00:00,  2.85s/it]


Accuracy: 0.7912, +/- 0.003929376540877685
F1: 0.7935295426227137, +/- 0.003199364807419359
Precision: 0.7848559876602651, +/- 0.005488388956921185
Recall: 0.8036000000000001, +/- 0.0021354156504062465
Auroc: 0.7912000000000001, +/- 0.0039293765408776994
top_value: 100
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:10<00:00,  2.67s/it]


Accuracy: 0.9650000000000001, +/- 0.0008944271909998918
F1: 0.9649320996398162, +/- 0.000989592815723742
Precision: 0.9647676894758627, +/- 0.001578775215620227
Recall: 0.9655999999999999, +/- 0.0026381811916545354
Auroc: 0.9650000000000001, +/- 0.0008944271909999104
top_value: 25
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._forest.RandomForestClassifier'>


100%|██████████| 4/4 [00:10<00:00,  2.60s/it]


Accuracy: 0.7592000000000001, +/- 0.0024166091947188935
F1: 0.7553310668566944, +/- 0.002898040385384188
Precision: 0.7687997074739704, +/- 0.00196267070599759
Recall: 0.744, +/- 0.004427188724235735
Auroc: 0.7592000000000001, +/- 0.0024166091947189047
top_value: 100
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.27s/it]


Accuracy: 0.7454, +/- 0.0032031234756093804
F1: 0.7463406344249218, +/- 0.0030036826885463485
Precision: 0.7451979624450059, +/- 0.003156757689097665
Recall: 0.75, +/- 0.00456070170039655
Auroc: 0.7453999999999998, +/- 0.003203123475609374
top_value: 100
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:12<00:00,  3.10s/it]


Accuracy: 0.7576, +/- 0.006087692502089765
F1: 0.7586251135790729, +/- 0.005449319428976082
Precision: 0.7562676986525924, +/- 0.0072357843106312094
Recall: 0.7619999999999999, +/- 0.004147288270665548
Auroc: 0.7576, +/- 0.006087692502089765
top_value: 100
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.32s/it]


Accuracy: 0.7737999999999999, +/- 0.004152107898405342
F1: 0.7727128547024504, +/- 0.0037000909952055244
Precision: 0.7770270694365721, +/- 0.005934974381376685
Recall: 0.7712000000000001, +/- 0.0044988887516808004
Auroc: 0.7737999999999999, +/- 0.004152107898405339
top_value: 100
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.33s/it]


Accuracy: 0.9596, +/- 0.002111871208194265
F1: 0.9592591550122478, +/- 0.0022053955178830097
Precision: 0.9649527254867891, +/- 0.002106340767845896
Recall: 0.954, +/- 0.003999999999999967
Auroc: 0.9596, +/- 0.002111871208194265
top_value: 5
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>


100%|██████████| 4/4 [00:13<00:00,  3.32s/it]


Accuracy: 0.7572, +/- 0.003484250278036872
F1: 0.75628856582398, +/- 0.003196102804169619
Precision: 0.761178935389853, +/- 0.00436960714679284
Recall: 0.7532, +/- 0.0024166091947188965
Auroc: 0.7572, +/- 0.0034842502780368712
top_value: 100
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:06<00:00,  1.70s/it]


Accuracy: 0.7895999999999999, +/- 0.00305941170815566
F1: 0.7914552912437471, +/- 0.0028046436666241997
Precision: 0.7846362112180623, +/- 0.0038857794667436856
Recall: 0.7996000000000001, +/- 0.002638181191654581
Auroc: 0.7895999999999999, +/- 0.00305941170815566
top_value: 1
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.42s/it]


Accuracy: 0.8068, +/- 0.002653299832284285
F1: 0.8080303869124773, +/- 0.002803460887917699
Precision: 0.802883228226921, +/- 0.003097969828509205
Recall: 0.8144, +/- 0.0035999999999999795
Auroc: 0.8068, +/- 0.002653299832284285
top_value: 1
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.38s/it]


Accuracy: 0.837, +/- 0.001549193338482925
F1: 0.8395407757153389, +/- 0.0015012856336884966
Precision: 0.8273510971289154, +/- 0.001950205938655182
Recall: 0.8535999999999999, +/- 0.0019390719429665398
Auroc: 0.8370000000000001, +/- 0.0015491933384829321
top_value: 1
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.33s/it]


Accuracy: 0.9654, +/- 0.0014352700094407429
F1: 0.965214070543787, +/- 0.0014862670075661696
Precision: 0.9699902316616951, +/- 0.0009231498887758393
Recall: 0.9607999999999999, +/- 0.002154065922853812
Auroc: 0.9654, +/- 0.0014352700094407429
top_value: 1
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.svm._classes.SVC'>


100%|██████████| 4/4 [00:05<00:00,  1.38s/it]


Accuracy: 0.8018000000000001, +/- 0.002477902338672788
F1: 0.7956800961554462, +/- 0.0029962053228522914
Precision: 0.8200056452112344, +/- 0.003942301259038273
Recall: 0.776, +/- 0.005513619500836093
Auroc: 0.8017999999999998, +/- 0.002477902338672765
top_value: 0.01
Low-N, LazDEF-task, Vanilla-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:41<00:00, 10.39s/it]


Accuracy: 0.7874000000000001, +/- 0.0031717503054307056
F1: 0.7892442819136771, +/- 0.003553551231795305
Precision: 0.7830848970998588, +/- 0.002765029439127988
Recall: 0.7968, +/- 0.005276362383309153
Auroc: 0.7874, +/- 0.003171750305430732
top_value: 200
Low-N, LazDEF-task, Peptide-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:40<00:00, 10.12s/it]


Accuracy: 0.7948000000000001, +/- 0.0027820855486486986
F1: 0.7957524114200083, +/- 0.003082110978820135
Precision: 0.7920288093327843, +/- 0.002361831926007821
Recall: 0.8012, +/- 0.003720215047547632
Auroc: 0.7948000000000002, +/- 0.002782085548648741
top_value: 500
Low-N, LazDEF-task, LazBF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:42<00:00, 10.52s/it]


Accuracy: 0.8277999999999999, +/- 0.0019849433241279375
F1: 0.8289993268428468, +/- 0.0024721232847361636
Precision: 0.8244714908502144, +/- 0.0016947976999039998
Recall: 0.8348000000000001, +/- 0.005161395160225574
Auroc: 0.8278000000000001, +/- 0.001984943324127919
top_value: 500
Low-N, LazDEF-task, LazDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:31<00:00,  7.87s/it]


Accuracy: 0.9654, +/- 0.0016000000000000061
F1: 0.9650510193511854, +/- 0.0015572874820099422
Precision: 0.9749216610979866, +/- 0.0029671733606685116
Recall: 0.9555999999999999, +/- 0.0009797958971132607
Auroc: 0.9654, +/- 0.0016000000000000187
top_value: 100
Low-N, LazDEF-task, LazBCDEF-ESM-Embeddings <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>


100%|██████████| 4/4 [00:41<00:00, 10.47s/it]


Accuracy: 0.807, +/- 0.0029832867780352546
F1: 0.8084860513313525, +/- 0.002840785496555139
Precision: 0.8030939033690256, +/- 0.003654775923320908
Recall: 0.8148000000000002, +/- 0.0037202150475476336
Auroc: 0.807, +/- 0.0029832867780352546
top_value: 200
[[0.8026, 0.7605999999999999, 0.7454, 0.7895999999999999, 0.7874000000000001], [0.8106, 0.7614000000000001, 0.7576, 0.8068, 0.7948000000000001], [0.8311999999999999, 0.7912, 0.7737999999999999, 0.837, 0.8277999999999999], [0.9639999999999999, 0.9650000000000001, 0.9596, 0.9654, 0.9654], [0.8234, 0.7592000000000001, 0.7572, 0.8018000000000001, 0.807]]
[[0.001503329637837292, 0.003414674215792751, 0.0032031234756093804, 0.00305941170815566, 0.0031717503054307056], [0.003108054053583994, 0.0049658836071740575, 0.006087692502089765, 0.002653299832284285, 0.0027820855486486986], [0.0020346989949375635, 0.003929376540877685, 0.004152107898405342, 0.001549193338482925, 0.0019849433241279375], [0.001788854381999855, 0.0008944271909998918, 0.