In [None]:
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, roc_auc_score, average_precision_score
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns
import umap.umap_ as umap

  from .autonotebook import tqdm as notebook_tqdm


We first ran the `llm-embeddings.sh` on the cluster.

In [2]:
X_train = np.load('llama_input_train_embeddings.npy')
y_train = np.load('y_train.npy')

X_val = np.load('llama_input_val_embeddings.npy')
y_val = np.load('y_val.npy')

X_test = np.load('llama_input_test_embeddings.npy')
y_test = np.load('y_test.npy')


In [3]:
# === Hyperparameter grid ===
C_values = [0.001, 0.01, 0.1, 1, 10]
solvers = ['lbfgs', 'saga']
penalties = ['l1', 'l2']
class_weights = [None, 'balanced']

best_auc = -1
best_model = None
best_params = {}

print("🔍 Tuning hyperparameters using validation AUROC:\n")

for solver in solvers:
    for penalty in penalties:
        for C in C_values:
            for cw in class_weights:
                # Not all solver-penalty combinations are valid
                if solver in ['lbfgs'] and penalty == 'l1':
                    continue
                if solver == 'saga' and penalty not in ['l1', 'l2']:
                    continue
                if solver == 'liblinear' and penalty not in ['l1', 'l2']:
                    continue

                try:
                    model = LogisticRegression(
                        C=C,
                        penalty=penalty,
                        solver=solver,
                        class_weight=cw,
                        max_iter=1000
                    )
                    model.fit(X_train, y_train)
                    val_probs = model.predict_proba(X_val)[:, 1]
                    val_auc = roc_auc_score(y_val, val_probs)
                    val_ap = average_precision_score(y_val, val_probs)

                    print(f"C={C}, solver={solver}, penalty={penalty}, class_weight={cw} | AUROC: {val_auc:.4f} | AP: {val_ap:.4f}")

                    if val_auc > best_auc:
                        best_auc = val_auc
                        best_model = model
                        best_params = {
                            'C': C,
                            'solver': solver,
                            'penalty': penalty,
                            'class_weight': cw
                        }
                except Exception as e:
                    print(f"⚠️ Skipping invalid config: {e}")

# === Evaluate best model ===
test_probs = best_model.predict_proba(X_test)[:, 1]
test_auc = roc_auc_score(y_test, test_probs)
test_ap = average_precision_score(y_test, test_probs)

print("\n✅ Best parameters found:")
for k, v in best_params.items():
    print(f"  {k}: {v}")
print(f"\n📊 Test AUROC: {test_auc:.4f}")
print(f"📊 Test Average Precision: {test_ap:.4f}")

🔍 Tuning hyperparameters using validation AUROC:

C=0.001, solver=lbfgs, penalty=l2, class_weight=None | AUROC: 0.7474 | AP: 0.3383
C=0.001, solver=lbfgs, penalty=l2, class_weight=balanced | AUROC: 0.7518 | AP: 0.3428
C=0.01, solver=lbfgs, penalty=l2, class_weight=None | AUROC: 0.7674 | AP: 0.3713
C=0.01, solver=lbfgs, penalty=l2, class_weight=balanced | AUROC: 0.7753 | AP: 0.3765
C=0.1, solver=lbfgs, penalty=l2, class_weight=None | AUROC: 0.7915 | AP: 0.4062
C=0.1, solver=lbfgs, penalty=l2, class_weight=balanced | AUROC: 0.7942 | AP: 0.4079
C=1, solver=lbfgs, penalty=l2, class_weight=None | AUROC: 0.7966 | AP: 0.4215
C=1, solver=lbfgs, penalty=l2, class_weight=balanced | AUROC: 0.7910 | AP: 0.4096


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


C=10, solver=lbfgs, penalty=l2, class_weight=None | AUROC: 0.7625 | AP: 0.3780


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


C=10, solver=lbfgs, penalty=l2, class_weight=balanced | AUROC: 0.7531 | AP: 0.3635
C=0.001, solver=saga, penalty=l1, class_weight=None | AUROC: 0.5000 | AP: 0.1420
C=0.001, solver=saga, penalty=l1, class_weight=balanced | AUROC: 0.5000 | AP: 0.1420




C=0.01, solver=saga, penalty=l1, class_weight=None | AUROC: 0.5541 | AP: 0.1626




C=0.01, solver=saga, penalty=l1, class_weight=balanced | AUROC: 0.6750 | AP: 0.2660




C=0.1, solver=saga, penalty=l1, class_weight=None | AUROC: 0.7372 | AP: 0.3306




C=0.1, solver=saga, penalty=l1, class_weight=balanced | AUROC: 0.7594 | AP: 0.3491




C=1, solver=saga, penalty=l1, class_weight=None | AUROC: 0.7850 | AP: 0.3962




C=1, solver=saga, penalty=l1, class_weight=balanced | AUROC: 0.7918 | AP: 0.4033




C=10, solver=saga, penalty=l1, class_weight=None | AUROC: 0.7960 | AP: 0.4148




C=10, solver=saga, penalty=l1, class_weight=balanced | AUROC: 0.7960 | AP: 0.4135
C=0.001, solver=saga, penalty=l2, class_weight=None | AUROC: 0.7472 | AP: 0.3393
C=0.001, solver=saga, penalty=l2, class_weight=balanced | AUROC: 0.7517 | AP: 0.3427
C=0.01, solver=saga, penalty=l2, class_weight=None | AUROC: 0.7675 | AP: 0.3717
C=0.01, solver=saga, penalty=l2, class_weight=balanced | AUROC: 0.7752 | AP: 0.3766
C=0.1, solver=saga, penalty=l2, class_weight=None | AUROC: 0.7908 | AP: 0.4050
C=0.1, solver=saga, penalty=l2, class_weight=balanced | AUROC: 0.7939 | AP: 0.4065




C=1, solver=saga, penalty=l2, class_weight=None | AUROC: 0.7966 | AP: 0.4156




C=1, solver=saga, penalty=l2, class_weight=balanced | AUROC: 0.7963 | AP: 0.4134




C=10, solver=saga, penalty=l2, class_weight=None | AUROC: 0.7966 | AP: 0.4155
C=10, solver=saga, penalty=l2, class_weight=balanced | AUROC: 0.7960 | AP: 0.4136

✅ Best parameters found:
  C: 10
  solver: saga
  penalty: l2
  class_weight: None

📊 Test AUROC: 0.8143
📊 Test Average Precision: 0.4464




Tuning hyperparameters using validation AUROC using `liblinear` solver (program crashed, but here are the results):

* C=0.001, solver=liblinear, penalty=l1, class_weight=None | AUROC: 0.5541 | AP: 0.1626
* C=0.001, solver=liblinear, penalty=l1, class_weight=balanced | AUROC: 0.5000 | AP: 0.1420
* C=0.01, solver=liblinear, penalty=l1, class_weight=None | AUROC: 0.5743 | AP: 0.1788
* C=0.01, solver=liblinear, penalty=l1, class_weight=balanced | AUROC: 0.6832 | AP: 0.2741
* C=0.1, solver=liblinear, penalty=l1, class_weight=None | AUROC: 0.7354 | AP: 0.3276
* C=0.1, solver=liblinear, penalty=l1, class_weight=balanced | AUROC: 0.7614 | AP: 0.3505
* C=1, solver=liblinear, penalty=l1, class_weight=None | AUROC: 0.7928 | AP: 0.4057
* C=1, solver=liblinear, penalty=l1, class_weight=balanced | AUROC: 0.7924 | AP: 0.4084
* C=10, solver=liblinear, penalty=l1, class_weight=None | AUROC: 0.7551 | AP: 0.3685