# Model Testing

In [1]:
def test_on_clinical_dataset(
    model_name,
):
    import pandas as pd
    import joblib
    import lightgbm as lgb
    from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score
    import numpy as np
    import os

    model_dir = f"../models/{model_name}"
    os.makedirs(model_dir, exist_ok=True)

    trained_on = pd.read_csv(os.path.join(model_dir, "training_variants.txt"), sep="\t")
    trained_features = pd.read_csv(os.path.join(model_dir, "training_features.txt"), sep="\t")["Feature"].tolist()

    lgb_model = joblib.load(os.path.join(model_dir, "model.pkl"))

    data_path = "../data/final/clinical_labels_model_input.txt"
    target_column = "clinical_label"

    df_test = pd.read_csv(data_path, sep="\t")

    df_test = df_test[~df_test["ID"].isin(trained_on["ID"])]

    id_column = "ID"
    df_test = df_test.dropna(subset=[target_column])

    # Convert target labels
    df_test[target_column] = df_test[target_column].replace({"P": 1, "B": 0})

    df_test.columns = df_test.columns.str.replace(" ", "_")

    # Ensure only trained features are used
    trained_features = lgb_model.feature_name_
    feature_columns = df_test.columns.difference([id_column, target_column])

    # Convert feature columns to numeric
    df_test[feature_columns] = df_test[feature_columns].apply(pd.to_numeric, errors='coerce')

    # Drop rows with missing target values
    df_test = df_test.dropna(subset=[target_column])

    # Select only features used in training
    X_new = df_test[trained_features]
    y_new = df_test[target_column]

    # Get predicted probabilities
    y_pred_proba_new = lgb_model.predict_proba(X_new)[:, 1]

    # Calculate optimal cutoff
    fpr, tpr, thresholds = roc_curve(y_new, y_pred_proba_new)
    optimal_idx = np.argmax(tpr - fpr)  # Maximize (sensitivity - (1 - specificity))
    optimal_cutoff = thresholds[optimal_idx]

    # Apply optimal cutoff to get binary predictions
    y_pred_new = (y_pred_proba_new >= optimal_cutoff).astype(int)

    # Evaluate model performance
    auc_score_new = roc_auc_score(y_new, y_pred_proba_new)
    accuracy_new = accuracy_score(y_new, y_pred_new)

    print(f"AUC on testing data: {auc_score_new}")
    print(f"Optimal Cutoff for Maximized Sensitivity and Specificity: {optimal_cutoff}")
    print(f"Accuracy with optimal cutoff: {accuracy_new * 100:.2f}%")

    # Store predictions
    df_test[model_name] = y_pred_proba_new
    df_test = df_test[["ID", target_column, model_name]]

    result_dir = f"../results/predictions/clinical"
    os.makedirs(result_dir, exist_ok=True)
    df_test.to_csv(os.path.join(result_dir, f"{model_name}.txt"), sep="\t", index=False)


In [6]:
def test_on_functional_dataset(model_name):
    import pandas as pd
    import joblib
    import lightgbm as lgb
    from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score
    import numpy as np
    import os

    model_dir = f"../models/{model_name}"
    os.makedirs(model_dir, exist_ok=True)

    trained_on = pd.read_csv(os.path.join(model_dir, "training_variants.txt"), sep="\t")
    trained_features = pd.read_csv(os.path.join(model_dir, "training_features.txt"), sep="\t")["Feature"].tolist()

    lgb_model = joblib.load(os.path.join(model_dir, "model.pkl"))

    df_test = pd.read_csv("../data/final/functional_labels_model_input.txt", sep="\t")

    df_test = df_test[df_test["weight"] == 1]

    df_test = df_test[~df_test["ID"].isin(trained_on["ID"])]

    target_column = "functional_label"
    id_column = "ID"
    df_test = df_test.dropna(subset=[target_column])

    # Convert target labels
    df_test[target_column] = df_test[target_column].replace({"PS3": 1, "BS3": 0})

    df_test.columns = df_test.columns.str.replace(" ", "_")

    # Ensure only trained features are used
    trained_features = lgb_model.feature_name_
    feature_columns = df_test.columns.difference([id_column, target_column])

    # Convert feature columns to numeric
    df_test[feature_columns] = df_test[feature_columns].apply(pd.to_numeric, errors='coerce')

    # Drop rows with missing target values
    df_test = df_test.dropna(subset=[target_column])

    # Select only features used in training
    X_new = df_test[trained_features]
    y_new = df_test[target_column]

    # Get predicted probabilities
    y_pred_proba_new = lgb_model.predict_proba(X_new)[:, 1]

    # Calculate optimal cutoff
    fpr, tpr, thresholds = roc_curve(y_new, y_pred_proba_new)
    optimal_idx = np.argmax(tpr - fpr)  # Maximize (sensitivity - (1 - specificity))
    optimal_cutoff = thresholds[optimal_idx]

    # Apply optimal cutoff to get binary predictions
    y_pred_new = (y_pred_proba_new >= optimal_cutoff).astype(int)

    # Evaluate model performance
    auc_score_new = roc_auc_score(y_new, y_pred_proba_new)
    accuracy_new = accuracy_score(y_new, y_pred_new)

    print(f"AUC on testing data: {auc_score_new}")
    print(f"Optimal Cutoff for Maximized Sensitivity and Specificity: {optimal_cutoff}")
    print(f"Accuracy with optimal cutoff: {accuracy_new * 100:.2f}%")

    # Store predictions
    df_test[model_name] = y_pred_proba_new
    df_test = df_test[["ID", target_column, model_name]]

    result_dir = f"../results/predictions/functional"
    os.makedirs(result_dir, exist_ok=True)
    df_test.to_csv(os.path.join(result_dir, f"{model_name}.txt"), sep="\t", index=False)


### FuncVEP-CTI Testing on Clinical Data

In [3]:
test_on_clinical_dataset("FuncVEP_CTI")

  df_test[target_column] = df_test[target_column].replace({"P": 1, "B": 0})


AUC on testing data: 0.9929964364441541
Optimal Cutoff for Maximized Sensitivity and Specificity: 0.5215514558527333
Accuracy with optimal cutoff: 96.64%


### FuncVEP-CTE Testing on Clinical Data

In [4]:
test_on_clinical_dataset("FuncVEP_CTE")

  df_test[target_column] = df_test[target_column].replace({"P": 1, "B": 0})


AUC on testing data: 0.9719892955414221
Optimal Cutoff for Maximized Sensitivity and Specificity: 0.6024811404090825
Accuracy with optimal cutoff: 92.11%


### FuncVEP-SP Testing on Clinical Data

In [5]:
test_on_clinical_dataset("FuncVEP_SP")

  df_test[target_column] = df_test[target_column].replace({"P": 1, "B": 0})


AUC on testing data: 0.9476421931736309
Optimal Cutoff for Maximized Sensitivity and Specificity: 0.5835815336239117
Accuracy with optimal cutoff: 88.10%


### ClinVEP-CTI Testing on Functional Data

In [7]:
test_on_functional_dataset("ClinVEP_CTI")

  df_test[target_column] = df_test[target_column].replace({"PS3": 1, "BS3": 0})


AUC on testing data: 0.9117513505588414
Optimal Cutoff for Maximized Sensitivity and Specificity: 0.9838078962063843
Accuracy with optimal cutoff: 84.27%


### ClinVEP-CTE Testing on Functional Data

In [8]:
test_on_functional_dataset("ClinVEP_CTE")

  df_test[target_column] = df_test[target_column].replace({"PS3": 1, "BS3": 0})


AUC on testing data: 0.8487754130408951
Optimal Cutoff for Maximized Sensitivity and Specificity: 0.7691221471629278
Accuracy with optimal cutoff: 82.03%


### ClinVEP-SP Testing on Functional Data

In [10]:
test_on_functional_dataset("ClinVEP_SP")

  df_test[target_column] = df_test[target_column].replace({"PS3": 1, "BS3": 0})


AUC on testing data: 0.8613656675279157
Optimal Cutoff for Maximized Sensitivity and Specificity: 0.5638242534176116
Accuracy with optimal cutoff: 82.02%
