# Analyze repertoire stats model performance on validation set

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Union

%matplotlib inline
import seaborn as sns
import genetools
from IPython.display import display, Markdown

In [3]:
from malid import config, helpers, logger
from malid.external import model_evaluation
from malid.trained_model_wrappers import RepertoireClassifier
from malid.datamodels import (
    combine_classification_option_names,
)

# Analyze

In [5]:
for gene_locus in config.gene_loci_used:
    for target_obs_col in config.classification_targets:
        models_base_dir = RepertoireClassifier._get_model_base_dir(
            gene_locus=gene_locus, target_obs_column=target_obs_col
        )  # should already exist

        output_base_dir = (
            config.paths.repertoire_stats_classifier_output_dir
            / gene_locus.name
            / combine_classification_option_names(target_obs_col)
        )  # might not yet exist
        output_base_dir.mkdir(parents=True, exist_ok=True)  # create if needed

        model_output_prefix = models_base_dir / "train_smaller_model"
        results_output_prefix = output_base_dir / "train_smaller_model"

        try:
            logger.info(
                f"{gene_locus}, {target_obs_col} from {model_output_prefix} to {results_output_prefix}"
            )

            ## Load and summarize
            experiment_set = model_evaluation.ExperimentSet.load_from_disk(
                output_prefix=model_output_prefix
            )

            # Remove global fold (we trained global fold model, but now get evaluation scores on cross-validation folds only)
            # TODO: make kdict support: del self.model_outputs[:, fold_id]
            for key in experiment_set.model_outputs[:, -1].keys():
                logger.debug(f"Removing {key} (global fold)")
                del experiment_set.model_outputs[key]

            experiment_set_global_performance = experiment_set.summarize()
            experiment_set_global_performance.export_all_models(
                func_generate_classification_report_fname=lambda model_name: f"{results_output_prefix}.classification_report.{model_name}.txt",
                func_generate_confusion_matrix_fname=lambda model_name: f"{results_output_prefix}.confusion_matrix.{model_name}.png",
                dpi=72,
            )
            combined_stats = (
                experiment_set_global_performance.get_model_comparison_stats(sort=True)
            )
            combined_stats.to_csv(
                f"{results_output_prefix}.compare_model_scores.tsv",
                sep="\t",
            )
            display(
                Markdown(
                    f"## {gene_locus}, {target_obs_col} from {model_output_prefix} to {results_output_prefix}"
                )
            )
            display(combined_stats)

            ## Review binary misclassifications: Binary prediction vs ground truth
            # For binary case, make new confusion matrix of actual disease label (y) vs predicted y_binary
            # (But this changes global score metrics)
            if (
                target_obs_col.value.is_target_binary_for_repertoire_composition_classifier
            ):
                # this is a binary healthy/sick classifier
                # re-summarize with different ground truth label
                experiment_set.summarize(
                    global_evaluation_column_name=target_obs_col.value.confusion_matrix_expanded_column_name
                ).export_all_models(
                    func_generate_classification_report_fname=lambda model_name: f"{results_output_prefix}.classification_report.{model_name}.binary_vs_ground_truth.txt",
                    func_generate_confusion_matrix_fname=lambda model_name: f"{results_output_prefix}.confusion_matrix.{model_name}.binary_vs_ground_truth.png",
                    confusion_matrix_pred_label="Predicted binary label",
                    dpi=72,
                )

            ## also create the “coefficient variability” plot, over all the CV folds
            for (
                model_name,
                model_global_performance,
            ) in experiment_set_global_performance.model_global_performances.items():
                # get feature importances for each fold
                feature_importances: Union[
                    pd.DataFrame, None
                ] = model_global_performance.feature_importances

                if feature_importances is not None:
                    # feature importances are available for this model
                    fig = plt.figure(figsize=(9, 9))
                    sns.boxplot(data=feature_importances.abs(), orient="h")
                    plt.title(
                        f"Feature importance (absolute value) variability: {model_name}"
                    )
                    plt.tight_layout()
                    genetools.plots.savefig(
                        fig,
                        f"{results_output_prefix}.feature_importances.{model_name}.png",
                        dpi=72,
                    )
                    plt.close(fig)

        except Exception as err:
            logger.exception(f"{gene_locus}, {target_obs_col} failed with error: {err}")

2023-01-07 04:08:58,592 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.disease from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/disease/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/disease/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.disease from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/disease/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/disease/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.014 (in 3 folds),0.940 +/- 0.016 (in 3 folds),0.937 +/- 0.017 (in 3 folds),0.941 +/- 0.018 (in 3 folds),0.805 +/- 0.023 (in 3 folds),0.719 +/- 0.031 (in 3 folds),0.805,0.718,323,0,323,0.0,False
rf_multiclass,0.922 +/- 0.008 (in 3 folds),0.923 +/- 0.009 (in 3 folds),0.929 +/- 0.008 (in 3 folds),0.932 +/- 0.010 (in 3 folds),0.768 +/- 0.051 (in 3 folds),0.659 +/- 0.068 (in 3 folds),0.768,0.658,323,0,323,0.0,False
linearsvm_ovr,0.918 +/- 0.010 (in 3 folds),0.918 +/- 0.014 (in 3 folds),0.916 +/- 0.014 (in 3 folds),0.920 +/- 0.016 (in 3 folds),0.783 +/- 0.005 (in 3 folds),0.685 +/- 0.011 (in 3 folds),0.783,0.683,323,0,323,0.0,False
xgboost,0.910 +/- 0.029 (in 3 folds),0.909 +/- 0.031 (in 3 folds),0.918 +/- 0.029 (in 3 folds),0.922 +/- 0.031 (in 3 folds),0.783 +/- 0.019 (in 3 folds),0.682 +/- 0.031 (in 3 folds),0.783,0.681,323,0,323,0.0,False
ridge_cv,0.907 +/- 0.019 (in 3 folds),0.900 +/- 0.024 (in 3 folds),0.907 +/- 0.022 (in 3 folds),0.906 +/- 0.027 (in 3 folds),0.575 +/- 0.202 (in 3 folds),0.238 +/- 0.412 (in 3 folds),0.576,0.358,323,0,323,0.0,False
elasticnet_cv,0.907 +/- 0.016 (in 3 folds),0.900 +/- 0.017 (in 3 folds),0.906 +/- 0.013 (in 3 folds),0.906 +/- 0.015 (in 3 folds),0.755 +/- 0.046 (in 3 folds),0.641 +/- 0.059 (in 3 folds),0.755,0.638,323,0,323,0.0,False
lasso_cv,0.896 +/- 0.019 (in 3 folds),0.889 +/- 0.019 (in 3 folds),0.892 +/- 0.016 (in 3 folds),0.892 +/- 0.018 (in 3 folds),0.762 +/- 0.072 (in 3 folds),0.650 +/- 0.100 (in 3 folds),0.762,0.646,323,0,323,0.0,False
dummy_stratified,0.509 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.512 +/- 0.015 (in 3 folds),0.515 +/- 0.016 (in 3 folds),0.341 +/- 0.052 (in 3 folds),0.015 +/- 0.067 (in 3 folds),0.341,0.014,323,0,323,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.019 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,323,0,323,0.0,True


2023-01-07 04:09:04,130 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/disease_all_demographics_present/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/disease_all_demographics_present/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/disease_all_demographics_present/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/disease_all_demographics_present/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.012 (in 3 folds),0.939 +/- 0.012 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.941 +/- 0.014 (in 3 folds),0.800 +/- 0.008 (in 3 folds),0.708 +/- 0.007 (in 3 folds),0.8,0.707,285,0,285,0.0,False
rf_multiclass,0.918 +/- 0.004 (in 3 folds),0.918 +/- 0.006 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.003 (in 3 folds),0.748 +/- 0.062 (in 3 folds),0.629 +/- 0.075 (in 3 folds),0.747,0.626,285,0,285,0.0,False
xgboost,0.913 +/- 0.018 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.925 +/- 0.015 (in 3 folds),0.927 +/- 0.014 (in 3 folds),0.754 +/- 0.032 (in 3 folds),0.640 +/- 0.038 (in 3 folds),0.754,0.635,285,0,285,0.0,False
linearsvm_ovr,0.912 +/- 0.010 (in 3 folds),0.910 +/- 0.010 (in 3 folds),0.908 +/- 0.007 (in 3 folds),0.910 +/- 0.010 (in 3 folds),0.772 +/- 0.048 (in 3 folds),0.668 +/- 0.064 (in 3 folds),0.772,0.664,285,0,285,0.0,False
ridge_cv,0.910 +/- 0.015 (in 3 folds),0.903 +/- 0.022 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.910 +/- 0.024 (in 3 folds),0.654 +/- 0.159 (in 3 folds),0.421 +/- 0.375 (in 3 folds),0.656,0.498,285,0,285,0.0,False
elasticnet_cv,0.904 +/- 0.009 (in 3 folds),0.898 +/- 0.008 (in 3 folds),0.908 +/- 0.013 (in 3 folds),0.909 +/- 0.011 (in 3 folds),0.780 +/- 0.076 (in 3 folds),0.677 +/- 0.101 (in 3 folds),0.779,0.671,285,0,285,0.0,False
lasso_cv,0.891 +/- 0.021 (in 3 folds),0.885 +/- 0.021 (in 3 folds),0.896 +/- 0.017 (in 3 folds),0.896 +/- 0.017 (in 3 folds),0.759 +/- 0.090 (in 3 folds),0.644 +/- 0.123 (in 3 folds),0.758,0.639,285,0,285,0.0,False
dummy_stratified,0.530 +/- 0.012 (in 3 folds),0.536 +/- 0.011 (in 3 folds),0.524 +/- 0.005 (in 3 folds),0.529 +/- 0.005 (in 3 folds),0.379 +/- 0.006 (in 3 folds),0.059 +/- 0.030 (in 3 folds),0.379,0.059,285,0,285,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.463,0.0,285,0,285,0.0,True


2023-01-07 04:09:09,389 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/covid_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/covid_vs_healthy/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/covid_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/covid_vs_healthy/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.963 +/- 0.007 (in 3 folds),0.963 +/- 0.007 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.948 +/- 0.010 (in 3 folds),0.844 +/- 0.040 (in 3 folds),0.948,0.844,191,0,191,0.0,False
elasticnet_cv,0.954 +/- 0.023 (in 3 folds),0.954 +/- 0.023 (in 3 folds),0.983 +/- 0.011 (in 3 folds),0.983 +/- 0.011 (in 3 folds),0.822 +/- 0.058 (in 3 folds),0.227 +/- 0.394 (in 3 folds),0.822,0.394,191,0,191,0.0,False
linearsvm_ovr,0.950 +/- 0.024 (in 3 folds),0.950 +/- 0.024 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.947 +/- 0.025 (in 3 folds),0.843 +/- 0.084 (in 3 folds),0.948,0.844,191,0,191,0.0,False
rf_multiclass,0.948 +/- 0.031 (in 3 folds),0.948 +/- 0.031 (in 3 folds),0.972 +/- 0.019 (in 3 folds),0.972 +/- 0.019 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.760 +/- 0.061 (in 3 folds),0.921,0.762,191,0,191,0.0,False
xgboost,0.946 +/- 0.026 (in 3 folds),0.946 +/- 0.026 (in 3 folds),0.967 +/- 0.035 (in 3 folds),0.967 +/- 0.035 (in 3 folds),0.942 +/- 0.010 (in 3 folds),0.829 +/- 0.031 (in 3 folds),0.942,0.827,191,0,191,0.0,False
ridge_cv,0.935 +/- 0.011 (in 3 folds),0.935 +/- 0.011 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191,0,191,0.0,True
lasso_cv,0.929 +/- 0.026 (in 3 folds),0.929 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191,0,191,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191,0,191,0.0,True
dummy_stratified,0.415 +/- 0.006 (in 3 folds),0.415 +/- 0.006 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.607 +/- 0.017 (in 3 folds),-0.175 +/- 0.016 (in 3 folds),0.607,-0.175,191,0,191,0.0,False


2023-01-07 04:09:19,147 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/hiv_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/hiv_vs_healthy/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/hiv_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/hiv_vs_healthy/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
xgboost,0.966 +/- 0.010 (in 3 folds),0.966 +/- 0.010 (in 3 folds),0.987 +/- 0.003 (in 3 folds),0.987 +/- 0.003 (in 3 folds),0.897 +/- 0.014 (in 3 folds),0.753 +/- 0.037 (in 3 folds),0.897,0.752,213,0,213,0.0,False
lasso_cv,0.965 +/- 0.027 (in 3 folds),0.965 +/- 0.027 (in 3 folds),0.986 +/- 0.010 (in 3 folds),0.986 +/- 0.010 (in 3 folds),0.907 +/- 0.052 (in 3 folds),0.771 +/- 0.134 (in 3 folds),0.906,0.772,213,0,213,0.0,False
rf_multiclass,0.964 +/- 0.016 (in 3 folds),0.964 +/- 0.016 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.902 +/- 0.012 (in 3 folds),0.761 +/- 0.033 (in 3 folds),0.901,0.761,213,0,213,0.0,False
elasticnet_cv,0.963 +/- 0.024 (in 3 folds),0.963 +/- 0.024 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.902 +/- 0.036 (in 3 folds),0.759 +/- 0.096 (in 3 folds),0.901,0.76,213,0,213,0.0,False
lasso_multiclass,0.961 +/- 0.030 (in 3 folds),0.961 +/- 0.030 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.925 +/- 0.031 (in 3 folds),0.824 +/- 0.073 (in 3 folds),0.925,0.823,213,0,213,0.0,False
ridge_cv,0.949 +/- 0.029 (in 3 folds),0.949 +/- 0.029 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.765 +/- 0.069 (in 3 folds),0.327 +/- 0.297 (in 3 folds),0.765,0.382,213,0,213,0.0,False
linearsvm_ovr,0.942 +/- 0.022 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.975 +/- 0.010 (in 3 folds),0.975 +/- 0.010 (in 3 folds),0.901 +/- 0.026 (in 3 folds),0.771 +/- 0.059 (in 3 folds),0.901,0.772,213,0,213,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.7,0.0,213,0,213,0.0,True
dummy_stratified,0.424 +/- 0.015 (in 3 folds),0.424 +/- 0.015 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.530 +/- 0.010 (in 3 folds),-0.159 +/- 0.031 (in 3 folds),0.531,-0.159,213,0,213,0.0,False


2023-01-07 04:09:28,749 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/lupus_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/lupus_vs_healthy/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/lupus_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/lupus_vs_healthy/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.027 (in 3 folds),0.938 +/- 0.027 (in 3 folds),0.904 +/- 0.040 (in 3 folds),0.904 +/- 0.040 (in 3 folds),0.885 +/- 0.008 (in 3 folds),0.734 +/- 0.012 (in 3 folds),0.885,0.733,217,0,217,0.0,False
linearsvm_ovr,0.936 +/- 0.030 (in 3 folds),0.936 +/- 0.030 (in 3 folds),0.893 +/- 0.048 (in 3 folds),0.893 +/- 0.048 (in 3 folds),0.876 +/- 0.025 (in 3 folds),0.717 +/- 0.043 (in 3 folds),0.876,0.715,217,0,217,0.0,False
elasticnet_cv,0.934 +/- 0.022 (in 3 folds),0.934 +/- 0.022 (in 3 folds),0.905 +/- 0.036 (in 3 folds),0.905 +/- 0.036 (in 3 folds),0.885 +/- 0.021 (in 3 folds),0.725 +/- 0.044 (in 3 folds),0.885,0.725,217,0,217,0.0,False
ridge_cv,0.933 +/- 0.028 (in 3 folds),0.933 +/- 0.028 (in 3 folds),0.894 +/- 0.062 (in 3 folds),0.894 +/- 0.062 (in 3 folds),0.812 +/- 0.134 (in 3 folds),0.491 +/- 0.426 (in 3 folds),0.811,0.558,217,0,217,0.0,False
lasso_cv,0.930 +/- 0.023 (in 3 folds),0.930 +/- 0.023 (in 3 folds),0.894 +/- 0.040 (in 3 folds),0.894 +/- 0.040 (in 3 folds),0.880 +/- 0.015 (in 3 folds),0.715 +/- 0.027 (in 3 folds),0.88,0.715,217,0,217,0.0,False
xgboost,0.909 +/- 0.002 (in 3 folds),0.909 +/- 0.002 (in 3 folds),0.868 +/- 0.013 (in 3 folds),0.868 +/- 0.013 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.647 +/- 0.035 (in 3 folds),0.848,0.638,217,0,217,0.0,False
rf_multiclass,0.899 +/- 0.027 (in 3 folds),0.899 +/- 0.027 (in 3 folds),0.852 +/- 0.046 (in 3 folds),0.852 +/- 0.046 (in 3 folds),0.857 +/- 0.044 (in 3 folds),0.660 +/- 0.091 (in 3 folds),0.857,0.657,217,0,217,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.313 +/- 0.028 (in 3 folds),0.313 +/- 0.028 (in 3 folds),0.687 +/- 0.028 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.687,0.0,217,0,217,0.0,True
dummy_stratified,0.394 +/- 0.034 (in 3 folds),0.394 +/- 0.034 (in 3 folds),0.294 +/- 0.028 (in 3 folds),0.294 +/- 0.028 (in 3 folds),0.498 +/- 0.040 (in 3 folds),-0.223 +/- 0.075 (in 3 folds),0.498,-0.223,217,0,217,0.0,False


2023-01-07 04:09:38,488 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/ethnicity_condensed_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/ethnicity_condensed_healthy_only/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/ethnicity_condensed_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/ethnicity_condensed_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.746 +/- 0.009 (in 3 folds),0.739 +/- 0.009 (in 3 folds),0.771 +/- 0.012 (in 3 folds),0.769 +/- 0.006 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
lasso_cv,0.741 +/- 0.009 (in 3 folds),0.735 +/- 0.010 (in 3 folds),0.772 +/- 0.015 (in 3 folds),0.772 +/- 0.008 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
linearsvm_ovr,0.716 +/- 0.111 (in 3 folds),0.691 +/- 0.127 (in 3 folds),0.719 +/- 0.089 (in 3 folds),0.698 +/- 0.099 (in 3 folds),0.637 +/- 0.051 (in 3 folds),0.454 +/- 0.125 (in 3 folds),0.636,0.435,132,0,132,0.0,False
lasso_multiclass,0.696 +/- 0.143 (in 3 folds),0.671 +/- 0.165 (in 3 folds),0.738 +/- 0.113 (in 3 folds),0.721 +/- 0.133 (in 3 folds),0.542 +/- 0.126 (in 3 folds),0.288 +/- 0.201 (in 3 folds),0.545,0.309,132,0,132,0.0,False
rf_multiclass,0.670 +/- 0.036 (in 3 folds),0.616 +/- 0.022 (in 3 folds),0.692 +/- 0.008 (in 3 folds),0.653 +/- 0.006 (in 3 folds),0.658 +/- 0.094 (in 3 folds),0.517 +/- 0.069 (in 3 folds),0.659,0.513,132,0,132,0.0,True
xgboost,0.648 +/- 0.110 (in 3 folds),0.634 +/- 0.125 (in 3 folds),0.682 +/- 0.048 (in 3 folds),0.667 +/- 0.051 (in 3 folds),0.643 +/- 0.048 (in 3 folds),0.452 +/- 0.033 (in 3 folds),0.644,0.452,132,0,132,0.0,True
ridge_cv,0.602 +/- 0.089 (in 3 folds),0.564 +/- 0.057 (in 3 folds),0.625 +/- 0.109 (in 3 folds),0.605 +/- 0.092 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
dummy_stratified,0.457 +/- 0.015 (in 3 folds),0.467 +/- 0.014 (in 3 folds),0.493 +/- 0.003 (in 3 folds),0.495 +/- 0.002 (in 3 folds),0.318 +/- 0.073 (in 3 folds),-0.102 +/- 0.053 (in 3 folds),0.318,-0.103,132,0,132,0.0,False


2023-01-07 04:09:43,846 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/age_group_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/age_group_healthy_only/train_smaller_model


























































































































































































































## GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/age_group_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/age_group_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.647 +/- 0.043 (in 3 folds),0.644 +/- 0.043 (in 3 folds),0.690 +/- 0.040 (in 3 folds),0.688 +/- 0.041 (in 3 folds),0.191 +/- 0.047 (in 3 folds),0.148 +/- 0.070 (in 3 folds),0.189,0.019,132,0,132,0.0,True
lasso_cv,0.642 +/- 0.036 (in 3 folds),0.638 +/- 0.038 (in 3 folds),0.688 +/- 0.035 (in 3 folds),0.685 +/- 0.038 (in 3 folds),0.214 +/- 0.058 (in 3 folds),0.166 +/- 0.083 (in 3 folds),0.212,0.051,132,0,132,0.0,True
ridge_cv,0.633 +/- 0.065 (in 3 folds),0.628 +/- 0.072 (in 3 folds),0.680 +/- 0.047 (in 3 folds),0.675 +/- 0.051 (in 3 folds),0.152 +/- 0.020 (in 3 folds),-0.003 +/- 0.006 (in 3 folds),0.152,-0.045,132,0,132,0.0,True
lasso_multiclass,0.624 +/- 0.062 (in 3 folds),0.616 +/- 0.076 (in 3 folds),0.677 +/- 0.059 (in 3 folds),0.669 +/- 0.070 (in 3 folds),0.281 +/- 0.059 (in 3 folds),0.148 +/- 0.063 (in 3 folds),0.28,0.144,132,0,132,0.0,False
rf_multiclass,0.610 +/- 0.038 (in 3 folds),0.604 +/- 0.037 (in 3 folds),0.669 +/- 0.035 (in 3 folds),0.665 +/- 0.038 (in 3 folds),0.260 +/- 0.061 (in 3 folds),0.133 +/- 0.069 (in 3 folds),0.258,0.106,132,0,132,0.0,True
linearsvm_ovr,0.605 +/- 0.071 (in 3 folds),0.601 +/- 0.082 (in 3 folds),0.651 +/- 0.066 (in 3 folds),0.649 +/- 0.077 (in 3 folds),0.235 +/- 0.036 (in 3 folds),0.094 +/- 0.035 (in 3 folds),0.235,0.092,132,0,132,0.0,False
xgboost,0.552 +/- 0.031 (in 3 folds),0.550 +/- 0.040 (in 3 folds),0.631 +/- 0.025 (in 3 folds),0.633 +/- 0.035 (in 3 folds),0.205 +/- 0.013 (in 3 folds),0.058 +/- 0.007 (in 3 folds),0.205,0.05,132,0,132,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.167 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.167,-0.046,132,0,132,0.0,True
dummy_stratified,0.479 +/- 0.043 (in 3 folds),0.479 +/- 0.041 (in 3 folds),0.519 +/- 0.014 (in 3 folds),0.519 +/- 0.014 (in 3 folds),0.126 +/- 0.068 (in 3 folds),-0.044 +/- 0.099 (in 3 folds),0.129,-0.047,132,0,132,0.0,True


2023-01-07 04:09:51,671 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/age_group_binary_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/age_group_binary_healthy_only/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/age_group_binary_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/age_group_binary_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.684 +/- 0.097 (in 3 folds),0.684 +/- 0.097 (in 3 folds),0.793 +/- 0.104 (in 3 folds),0.793 +/- 0.104 (in 3 folds),0.691 +/- 0.071 (in 3 folds),0.274 +/- 0.255 (in 3 folds),0.689,0.247,132,0,132,0.0,False
xgboost,0.673 +/- 0.077 (in 3 folds),0.673 +/- 0.077 (in 3 folds),0.802 +/- 0.073 (in 3 folds),0.802 +/- 0.073 (in 3 folds),0.685 +/- 0.074 (in 3 folds),0.276 +/- 0.162 (in 3 folds),0.682,0.263,132,0,132,0.0,False
lasso_multiclass,0.621 +/- 0.062 (in 3 folds),0.621 +/- 0.062 (in 3 folds),0.765 +/- 0.100 (in 3 folds),0.765 +/- 0.100 (in 3 folds),0.576 +/- 0.097 (in 3 folds),0.137 +/- 0.193 (in 3 folds),0.576,0.115,132,0,132,0.0,False
linearsvm_ovr,0.593 +/- 0.059 (in 3 folds),0.593 +/- 0.059 (in 3 folds),0.768 +/- 0.090 (in 3 folds),0.768 +/- 0.090 (in 3 folds),0.547 +/- 0.061 (in 3 folds),0.023 +/- 0.123 (in 3 folds),0.545,0.01,132,0,132,0.0,False
elasticnet_cv,0.539 +/- 0.080 (in 3 folds),0.539 +/- 0.080 (in 3 folds),0.706 +/- 0.070 (in 3 folds),0.706 +/- 0.070 (in 3 folds),0.654 +/- 0.077 (in 3 folds),0.037 +/- 0.142 (in 3 folds),0.652,0.025,132,0,132,0.0,False
lasso_cv,0.532 +/- 0.084 (in 3 folds),0.532 +/- 0.084 (in 3 folds),0.700 +/- 0.075 (in 3 folds),0.700 +/- 0.075 (in 3 folds),0.632 +/- 0.099 (in 3 folds),-0.001 +/- 0.085 (in 3 folds),0.629,0.012,132,0,132,0.0,False
dummy_stratified,0.511 +/- 0.050 (in 3 folds),0.511 +/- 0.050 (in 3 folds),0.668 +/- 0.052 (in 3 folds),0.668 +/- 0.052 (in 3 folds),0.520 +/- 0.085 (in 3 folds),0.025 +/- 0.101 (in 3 folds),0.523,0.007,132,0,132,0.0,False
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.659,0.0,132,0,132,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.659,0.0,132,0,132,0.0,True


2023-01-07 04:10:01,491 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.age_group_pediatric_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/age_group_pediatric_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/age_group_pediatric_healthy_only/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.age_group_pediatric_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/age_group_pediatric_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/age_group_pediatric_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.942 +/- 0.060 (in 3 folds),0.942 +/- 0.060 (in 3 folds),0.815 +/- 0.164 (in 3 folds),0.815 +/- 0.164 (in 3 folds),0.856 +/- 0.053 (in 3 folds),0.142 +/- 0.246 (in 3 folds),0.856,0.207,132,0,132,0.0,False
lasso_multiclass,0.938 +/- 0.055 (in 3 folds),0.938 +/- 0.055 (in 3 folds),0.825 +/- 0.175 (in 3 folds),0.825 +/- 0.175 (in 3 folds),0.901 +/- 0.020 (in 3 folds),0.647 +/- 0.085 (in 3 folds),0.902,0.625,132,0,132,0.0,False
linearsvm_ovr,0.928 +/- 0.078 (in 3 folds),0.928 +/- 0.078 (in 3 folds),0.846 +/- 0.110 (in 3 folds),0.846 +/- 0.110 (in 3 folds),0.909 +/- 0.022 (in 3 folds),0.654 +/- 0.065 (in 3 folds),0.909,0.632,132,0,132,0.0,False
rf_multiclass,0.890 +/- 0.065 (in 3 folds),0.890 +/- 0.065 (in 3 folds),0.629 +/- 0.207 (in 3 folds),0.629 +/- 0.207 (in 3 folds),0.865 +/- 0.053 (in 3 folds),0.182 +/- 0.316 (in 3 folds),0.864,0.294,132,0,132,0.0,False
elasticnet_cv,0.869 +/- 0.113 (in 3 folds),0.869 +/- 0.113 (in 3 folds),0.693 +/- 0.171 (in 3 folds),0.693 +/- 0.171 (in 3 folds),0.871 +/- 0.077 (in 3 folds),0.252 +/- 0.436 (in 3 folds),0.871,0.361,132,0,132,0.0,False
xgboost,0.841 +/- 0.167 (in 3 folds),0.841 +/- 0.167 (in 3 folds),0.644 +/- 0.261 (in 3 folds),0.644 +/- 0.261 (in 3 folds),0.889 +/- 0.055 (in 3 folds),0.391 +/- 0.425 (in 3 folds),0.886,0.466,132,0,132,0.0,False
lasso_cv,0.816 +/- 0.100 (in 3 folds),0.816 +/- 0.100 (in 3 folds),0.568 +/- 0.217 (in 3 folds),0.568 +/- 0.217 (in 3 folds),0.849 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.848,0.0,132,0,132,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.151 +/- 0.042 (in 3 folds),0.151 +/- 0.042 (in 3 folds),0.849 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.848,0.0,132,0,132,0.0,True
dummy_stratified,0.452 +/- 0.034 (in 3 folds),0.452 +/- 0.034 (in 3 folds),0.147 +/- 0.040 (in 3 folds),0.147 +/- 0.040 (in 3 folds),0.704 +/- 0.026 (in 3 folds),-0.087 +/- 0.056 (in 3 folds),0.705,-0.083,132,0,132,0.0,False


2023-01-07 04:10:11,337 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/sex_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/sex_healthy_only/train_smaller_model


## GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/BCR/sex_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/sex_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
dummy_stratified,0.518 +/- 0.048 (in 3 folds),0.518 +/- 0.048 (in 3 folds),0.474 +/- 0.120 (in 3 folds),0.474 +/- 0.120 (in 3 folds),0.538 +/- 0.063 (in 3 folds),0.037 +/- 0.096 (in 3 folds),0.538,0.063,132,0,132,0.0,False
lasso_cv,0.516 +/- 0.123 (in 3 folds),0.516 +/- 0.123 (in 3 folds),0.502 +/- 0.213 (in 3 folds),0.502 +/- 0.213 (in 3 folds),0.441 +/- 0.063 (in 3 folds),0.045 +/- 0.084 (in 3 folds),0.439,-0.101,132,0,132,0.0,False
xgboost,0.514 +/- 0.139 (in 3 folds),0.514 +/- 0.139 (in 3 folds),0.475 +/- 0.175 (in 3 folds),0.475 +/- 0.175 (in 3 folds),0.513 +/- 0.096 (in 3 folds),0.058 +/- 0.179 (in 3 folds),0.515,0.049,132,0,132,0.0,False
rf_multiclass,0.507 +/- 0.142 (in 3 folds),0.507 +/- 0.142 (in 3 folds),0.499 +/- 0.203 (in 3 folds),0.499 +/- 0.203 (in 3 folds),0.468 +/- 0.060 (in 3 folds),0.007 +/- 0.157 (in 3 folds),0.47,-0.043,132,0,132,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.137 (in 3 folds),0.463 +/- 0.137 (in 3 folds),0.397 +/- 0.069 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.394,-0.199,132,0,132,0.0,False
ridge_cv,0.480 +/- 0.034 (in 3 folds),0.480 +/- 0.034 (in 3 folds),0.454 +/- 0.151 (in 3 folds),0.454 +/- 0.151 (in 3 folds),0.418 +/- 0.039 (in 3 folds),-0.015 +/- 0.026 (in 3 folds),0.417,-0.157,132,0,132,0.0,False
lasso_multiclass,0.475 +/- 0.125 (in 3 folds),0.475 +/- 0.125 (in 3 folds),0.496 +/- 0.214 (in 3 folds),0.496 +/- 0.214 (in 3 folds),0.491 +/- 0.114 (in 3 folds),0.002 +/- 0.207 (in 3 folds),0.492,-0.001,132,0,132,0.0,False
elasticnet_cv,0.458 +/- 0.040 (in 3 folds),0.458 +/- 0.040 (in 3 folds),0.464 +/- 0.154 (in 3 folds),0.464 +/- 0.154 (in 3 folds),0.373 +/- 0.060 (in 3 folds),-0.145 +/- 0.128 (in 3 folds),0.371,-0.247,132,0,132,0.0,False
linearsvm_ovr,0.439 +/- 0.118 (in 3 folds),0.439 +/- 0.118 (in 3 folds),0.465 +/- 0.204 (in 3 folds),0.465 +/- 0.204 (in 3 folds),0.487 +/- 0.083 (in 3 folds),-0.053 +/- 0.215 (in 3 folds),0.485,-0.029,132,0,132,0.0,False


2023-01-07 04:10:20,558 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.disease from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/disease/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/disease/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.disease from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/disease/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/disease/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.945 +/- 0.008 (in 3 folds),0.946 +/- 0.006 (in 3 folds),0.934 +/- 0.008 (in 3 folds),0.937 +/- 0.005 (in 3 folds),0.759 +/- 0.036 (in 3 folds),0.669 +/- 0.047 (in 3 folds),0.759,0.667,274,0,274,0.0,False
ridge_cv,0.944 +/- 0.018 (in 3 folds),0.945 +/- 0.016 (in 3 folds),0.928 +/- 0.016 (in 3 folds),0.934 +/- 0.012 (in 3 folds),0.474 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.474,0.0,274,0,274,0.0,True
linearsvm_ovr,0.943 +/- 0.016 (in 3 folds),0.941 +/- 0.015 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.932 +/- 0.010 (in 3 folds),0.777 +/- 0.031 (in 3 folds),0.686 +/- 0.043 (in 3 folds),0.777,0.684,274,0,274,0.0,False
elasticnet_cv,0.941 +/- 0.015 (in 3 folds),0.941 +/- 0.013 (in 3 folds),0.912 +/- 0.010 (in 3 folds),0.917 +/- 0.007 (in 3 folds),0.730 +/- 0.060 (in 3 folds),0.595 +/- 0.103 (in 3 folds),0.73,0.594,274,0,274,0.0,False
rf_multiclass,0.936 +/- 0.013 (in 3 folds),0.938 +/- 0.011 (in 3 folds),0.926 +/- 0.009 (in 3 folds),0.931 +/- 0.008 (in 3 folds),0.752 +/- 0.040 (in 3 folds),0.629 +/- 0.059 (in 3 folds),0.752,0.627,274,0,274,0.0,False
lasso_cv,0.923 +/- 0.025 (in 3 folds),0.921 +/- 0.022 (in 3 folds),0.883 +/- 0.037 (in 3 folds),0.885 +/- 0.032 (in 3 folds),0.734 +/- 0.050 (in 3 folds),0.599 +/- 0.087 (in 3 folds),0.734,0.598,274,0,274,0.0,False
xgboost,0.916 +/- 0.008 (in 3 folds),0.916 +/- 0.008 (in 3 folds),0.916 +/- 0.003 (in 3 folds),0.918 +/- 0.004 (in 3 folds),0.748 +/- 0.034 (in 3 folds),0.627 +/- 0.048 (in 3 folds),0.748,0.625,274,0,274,0.0,False
dummy_stratified,0.502 +/- 0.017 (in 3 folds),0.509 +/- 0.012 (in 3 folds),0.512 +/- 0.006 (in 3 folds),0.516 +/- 0.005 (in 3 folds),0.336 +/- 0.029 (in 3 folds),-0.003 +/- 0.046 (in 3 folds),0.336,-0.003,274,0,274,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.474 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.474,0.0,274,0,274,0.0,True


2023-01-07 04:10:24,098 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/disease_all_demographics_present/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/disease_all_demographics_present/train_smaller_model


2023-01-07 04:10:24,667 - malid.external.model_evaluation - INFO - Removing ('lasso_cv', 0) because lasso_cv is incomplete.


2023-01-07 04:10:24,669 - malid.external.model_evaluation - INFO - Removing ('lasso_cv', 1) because lasso_cv is incomplete.


## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/disease_all_demographics_present/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/disease_all_demographics_present/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.946 +/- 0.027 (in 3 folds),0.947 +/- 0.028 (in 3 folds),0.942 +/- 0.027 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.808 +/- 0.033 (in 3 folds),0.723 +/- 0.057 (in 3 folds),0.808,0.724,240,0,240,0.0,False
ridge_cv,0.944 +/- 0.026 (in 3 folds),0.944 +/- 0.025 (in 3 folds),0.932 +/- 0.029 (in 3 folds),0.936 +/- 0.028 (in 3 folds),0.483 +/- 0.009 (in 3 folds),0.039 +/- 0.068 (in 3 folds),0.483,0.07,240,0,240,0.0,True
lasso_multiclass,0.944 +/- 0.024 (in 3 folds),0.946 +/- 0.022 (in 3 folds),0.940 +/- 0.016 (in 3 folds),0.944 +/- 0.015 (in 3 folds),0.817 +/- 0.033 (in 3 folds),0.741 +/- 0.051 (in 3 folds),0.817,0.741,240,0,240,0.0,False
elasticnet_cv,0.936 +/- 0.031 (in 3 folds),0.938 +/- 0.029 (in 3 folds),0.922 +/- 0.029 (in 3 folds),0.926 +/- 0.028 (in 3 folds),0.758 +/- 0.065 (in 3 folds),0.634 +/- 0.118 (in 3 folds),0.758,0.636,240,0,240,0.0,False
rf_multiclass,0.925 +/- 0.029 (in 3 folds),0.928 +/- 0.024 (in 3 folds),0.910 +/- 0.030 (in 3 folds),0.918 +/- 0.025 (in 3 folds),0.771 +/- 0.052 (in 3 folds),0.655 +/- 0.085 (in 3 folds),0.771,0.656,240,0,240,0.0,False
xgboost,0.919 +/- 0.013 (in 3 folds),0.921 +/- 0.011 (in 3 folds),0.912 +/- 0.013 (in 3 folds),0.917 +/- 0.012 (in 3 folds),0.762 +/- 0.048 (in 3 folds),0.649 +/- 0.079 (in 3 folds),0.762,0.649,240,0,240,0.0,False
dummy_stratified,0.531 +/- 0.035 (in 3 folds),0.523 +/- 0.032 (in 3 folds),0.529 +/- 0.023 (in 3 folds),0.526 +/- 0.020 (in 3 folds),0.405 +/- 0.062 (in 3 folds),0.088 +/- 0.087 (in 3 folds),0.404,0.086,240,0,240,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.479 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.479,0.0,240,0,240,0.0,True


2023-01-07 04:10:27,221 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/covid_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/covid_vs_healthy/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/covid_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/covid_vs_healthy/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.803 +/- 0.049 (in 3 folds),0.190 +/- 0.329 (in 3 folds),0.804,0.324,168,0,168,0.0,False
lasso_multiclass,0.991 +/- 0.005 (in 3 folds),0.991 +/- 0.005 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.952 +/- 0.028 (in 3 folds),0.861 +/- 0.086 (in 3 folds),0.952,0.862,168,0,168,0.0,False
rf_multiclass,0.990 +/- 0.004 (in 3 folds),0.990 +/- 0.004 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.868 +/- 0.069 (in 3 folds),0.578 +/- 0.233 (in 3 folds),0.869,0.595,168,0,168,0.0,False
elasticnet_cv,0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.874 +/- 0.084 (in 3 folds),0.577 +/- 0.320 (in 3 folds),0.875,0.613,168,0,168,0.0,False
xgboost,0.981 +/- 0.020 (in 3 folds),0.981 +/- 0.020 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.910 +/- 0.048 (in 3 folds),0.727 +/- 0.156 (in 3 folds),0.911,0.732,168,0,168,0.0,False
lasso_cv,0.980 +/- 0.025 (in 3 folds),0.980 +/- 0.025 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.904 +/- 0.090 (in 3 folds),0.670 +/- 0.355 (in 3 folds),0.905,0.713,168,0,168,0.0,False
linearsvm_ovr,0.974 +/- 0.026 (in 3 folds),0.974 +/- 0.026 (in 3 folds),0.991 +/- 0.010 (in 3 folds),0.991 +/- 0.010 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.850 +/- 0.044 (in 3 folds),0.946,0.844,168,0,168,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.774,0.0,168,0,168,0.0,True
dummy_stratified,0.407 +/- 0.032 (in 3 folds),0.407 +/- 0.032 (in 3 folds),0.743 +/- 0.018 (in 3 folds),0.743 +/- 0.018 (in 3 folds),0.572 +/- 0.025 (in 3 folds),-0.180 +/- 0.063 (in 3 folds),0.571,-0.181,168,0,168,0.0,False


2023-01-07 04:10:32,460 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/hiv_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/hiv_vs_healthy/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/hiv_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/hiv_vs_healthy/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.932 +/- 0.036 (in 3 folds),0.932 +/- 0.036 (in 3 folds),0.972 +/- 0.015 (in 3 folds),0.972 +/- 0.015 (in 3 folds),0.851 +/- 0.017 (in 3 folds),0.698 +/- 0.055 (in 3 folds),0.851,0.695,194,0,194,0.0,False
linearsvm_ovr,0.932 +/- 0.028 (in 3 folds),0.932 +/- 0.028 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.861 +/- 0.030 (in 3 folds),0.709 +/- 0.078 (in 3 folds),0.861,0.708,194,0,194,0.0,False
lasso_cv,0.924 +/- 0.022 (in 3 folds),0.924 +/- 0.022 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.680 +/- 0.056 (in 3 folds),0.845,0.677,194,0,194,0.0,False
elasticnet_cv,0.922 +/- 0.026 (in 3 folds),0.922 +/- 0.026 (in 3 folds),0.967 +/- 0.011 (in 3 folds),0.967 +/- 0.011 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.680 +/- 0.056 (in 3 folds),0.845,0.677,194,0,194,0.0,False
ridge_cv,0.921 +/- 0.036 (in 3 folds),0.921 +/- 0.036 (in 3 folds),0.967 +/- 0.014 (in 3 folds),0.967 +/- 0.014 (in 3 folds),0.753 +/- 0.081 (in 3 folds),0.348 +/- 0.304 (in 3 folds),0.753,0.4,194,0,194,0.0,False
xgboost,0.911 +/- 0.018 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.960 +/- 0.008 (in 3 folds),0.960 +/- 0.008 (in 3 folds),0.830 +/- 0.028 (in 3 folds),0.614 +/- 0.053 (in 3 folds),0.83,0.611,194,0,194,0.0,False
rf_multiclass,0.899 +/- 0.034 (in 3 folds),0.899 +/- 0.034 (in 3 folds),0.957 +/- 0.017 (in 3 folds),0.957 +/- 0.017 (in 3 folds),0.794 +/- 0.039 (in 3 folds),0.524 +/- 0.080 (in 3 folds),0.794,0.521,194,0,194,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.67,0.0,194,0,194,0.0,True
dummy_stratified,0.467 +/- 0.016 (in 3 folds),0.467 +/- 0.016 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.541 +/- 0.012 (in 3 folds),-0.068 +/- 0.032 (in 3 folds),0.541,-0.067,194,0,194,0.0,False


2023-01-07 04:10:36,276 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/lupus_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/lupus_vs_healthy/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/lupus_vs_healthy/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/lupus_vs_healthy/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.985 +/- 0.010 (in 3 folds),0.985 +/- 0.010 (in 3 folds),0.960 +/- 0.023 (in 3 folds),0.960 +/- 0.023 (in 3 folds),0.918 +/- 0.037 (in 3 folds),0.773 +/- 0.105 (in 3 folds),0.919,0.773,172,0,172,0.0,False
rf_multiclass,0.984 +/- 0.013 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.962 +/- 0.029 (in 3 folds),0.962 +/- 0.029 (in 3 folds),0.913 +/- 0.047 (in 3 folds),0.755 +/- 0.135 (in 3 folds),0.913,0.756,172,0,172,0.0,False
lasso_cv,0.984 +/- 0.008 (in 3 folds),0.984 +/- 0.008 (in 3 folds),0.954 +/- 0.022 (in 3 folds),0.954 +/- 0.022 (in 3 folds),0.913 +/- 0.047 (in 3 folds),0.753 +/- 0.143 (in 3 folds),0.913,0.755,172,0,172,0.0,False
ridge_cv,0.983 +/- 0.008 (in 3 folds),0.983 +/- 0.008 (in 3 folds),0.959 +/- 0.012 (in 3 folds),0.959 +/- 0.012 (in 3 folds),0.872 +/- 0.103 (in 3 folds),0.537 +/- 0.468 (in 3 folds),0.872,0.633,172,0,172,0.0,False
lasso_multiclass,0.979 +/- 0.009 (in 3 folds),0.979 +/- 0.009 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.924 +/- 0.009 (in 3 folds),0.796 +/- 0.021 (in 3 folds),0.924,0.794,172,0,172,0.0,False
linearsvm_ovr,0.973 +/- 0.012 (in 3 folds),0.973 +/- 0.012 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.919 +/- 0.010 (in 3 folds),0.777 +/- 0.026 (in 3 folds),0.919,0.774,172,0,172,0.0,False
xgboost,0.952 +/- 0.029 (in 3 folds),0.952 +/- 0.029 (in 3 folds),0.874 +/- 0.059 (in 3 folds),0.874 +/- 0.059 (in 3 folds),0.889 +/- 0.037 (in 3 folds),0.697 +/- 0.118 (in 3 folds),0.89,0.694,172,0,172,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.244 +/- 0.002 (in 3 folds),0.244 +/- 0.002 (in 3 folds),0.756 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.756,0.0,172,0,172,0.0,True
dummy_stratified,0.401 +/- 0.026 (in 3 folds),0.401 +/- 0.026 (in 3 folds),0.231 +/- 0.002 (in 3 folds),0.231 +/- 0.002 (in 3 folds),0.558 +/- 0.018 (in 3 folds),-0.197 +/- 0.053 (in 3 folds),0.558,-0.197,172,0,172,0.0,False


2023-01-07 04:10:40,015 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/ethnicity_condensed_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/ethnicity_condensed_healthy_only/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/ethnicity_condensed_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/ethnicity_condensed_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.726 +/- 0.021 (in 3 folds),0.745 +/- 0.033 (in 3 folds),0.761 +/- 0.004 (in 3 folds),0.776 +/- 0.010 (in 3 folds),0.618 +/- 0.169 (in 3 folds),0.375 +/- 0.326 (in 3 folds),0.617,0.415,115,0,115,0.0,True
ridge_cv,0.723 +/- 0.009 (in 3 folds),0.750 +/- 0.018 (in 3 folds),0.746 +/- 0.021 (in 3 folds),0.762 +/- 0.032 (in 3 folds),0.504 +/- 0.098 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.0,115,0,115,0.0,True
elasticnet_cv,0.721 +/- 0.017 (in 3 folds),0.738 +/- 0.018 (in 3 folds),0.759 +/- 0.007 (in 3 folds),0.775 +/- 0.006 (in 3 folds),0.567 +/- 0.114 (in 3 folds),0.178 +/- 0.308 (in 3 folds),0.565,0.296,115,0,115,0.0,True
lasso_multiclass,0.689 +/- 0.033 (in 3 folds),0.713 +/- 0.045 (in 3 folds),0.731 +/- 0.022 (in 3 folds),0.750 +/- 0.039 (in 3 folds),0.592 +/- 0.114 (in 3 folds),0.370 +/- 0.119 (in 3 folds),0.591,0.362,115,0,115,0.0,False
linearsvm_ovr,0.683 +/- 0.019 (in 3 folds),0.704 +/- 0.028 (in 3 folds),0.726 +/- 0.019 (in 3 folds),0.744 +/- 0.041 (in 3 folds),0.584 +/- 0.110 (in 3 folds),0.326 +/- 0.170 (in 3 folds),0.583,0.32,115,0,115,0.0,False
rf_multiclass,0.656 +/- 0.043 (in 3 folds),0.655 +/- 0.050 (in 3 folds),0.691 +/- 0.024 (in 3 folds),0.679 +/- 0.029 (in 3 folds),0.687 +/- 0.052 (in 3 folds),0.532 +/- 0.023 (in 3 folds),0.687,0.532,115,0,115,0.0,True
xgboost,0.610 +/- 0.080 (in 3 folds),0.619 +/- 0.088 (in 3 folds),0.695 +/- 0.038 (in 3 folds),0.695 +/- 0.043 (in 3 folds),0.653 +/- 0.079 (in 3 folds),0.431 +/- 0.094 (in 3 folds),0.652,0.431,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.504 +/- 0.098 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.0,115,0,115,0.0,True
dummy_stratified,0.482 +/- 0.031 (in 3 folds),0.481 +/- 0.030 (in 3 folds),0.502 +/- 0.008 (in 3 folds),0.501 +/- 0.008 (in 3 folds),0.401 +/- 0.082 (in 3 folds),-0.034 +/- 0.088 (in 3 folds),0.4,-0.035,115,0,115,0.0,True


2023-01-07 04:10:43,370 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/age_group_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/age_group_healthy_only/train_smaller_model


















































































































































## GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/age_group_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/age_group_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.699 +/- 0.066 (in 3 folds),0.677 +/- 0.089 (in 3 folds),0.731 +/- 0.069 (in 3 folds),0.715 +/- 0.086 (in 3 folds),0.427 +/- 0.038 (in 3 folds),0.316 +/- 0.048 (in 3 folds),0.426,0.315,115,0,115,0.0,True
linearsvm_ovr,0.692 +/- 0.066 (in 3 folds),0.668 +/- 0.093 (in 3 folds),0.730 +/- 0.064 (in 3 folds),0.713 +/- 0.084 (in 3 folds),0.427 +/- 0.053 (in 3 folds),0.313 +/- 0.058 (in 3 folds),0.426,0.309,115,0,115,0.0,True
xgboost,0.689 +/- 0.053 (in 3 folds),0.675 +/- 0.057 (in 3 folds),0.722 +/- 0.057 (in 3 folds),0.709 +/- 0.063 (in 3 folds),0.384 +/- 0.067 (in 3 folds),0.272 +/- 0.075 (in 3 folds),0.383,0.258,115,0,115,0.0,True
rf_multiclass,0.687 +/- 0.118 (in 3 folds),0.668 +/- 0.141 (in 3 folds),0.731 +/- 0.097 (in 3 folds),0.717 +/- 0.112 (in 3 folds),0.453 +/- 0.065 (in 3 folds),0.362 +/- 0.076 (in 3 folds),0.452,0.34,115,0,115,0.0,True
ridge_cv,0.672 +/- 0.151 (in 3 folds),0.669 +/- 0.149 (in 3 folds),0.698 +/- 0.172 (in 3 folds),0.696 +/- 0.170 (in 3 folds),0.225 +/- 0.034 (in 3 folds),0.046 +/- 0.080 (in 3 folds),0.226,0.047,115,0,115,0.0,True
lasso_cv,0.667 +/- 0.020 (in 3 folds),0.654 +/- 0.028 (in 3 folds),0.722 +/- 0.019 (in 3 folds),0.713 +/- 0.028 (in 3 folds),0.340 +/- 0.062 (in 3 folds),0.257 +/- 0.077 (in 3 folds),0.339,0.205,115,0,115,0.0,True
elasticnet_cv,0.665 +/- 0.020 (in 3 folds),0.652 +/- 0.030 (in 3 folds),0.720 +/- 0.023 (in 3 folds),0.711 +/- 0.032 (in 3 folds),0.305 +/- 0.048 (in 3 folds),0.237 +/- 0.074 (in 3 folds),0.304,0.162,115,0,115,0.0,True
dummy_stratified,0.515 +/- 0.025 (in 3 folds),0.511 +/- 0.027 (in 3 folds),0.535 +/- 0.009 (in 3 folds),0.534 +/- 0.009 (in 3 folds),0.201 +/- 0.045 (in 3 folds),0.042 +/- 0.051 (in 3 folds),0.2,0.035,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.208 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.209,0.012,115,0,115,0.0,True


2023-01-07 04:10:49,136 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/age_group_binary_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/age_group_binary_healthy_only/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/age_group_binary_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/age_group_binary_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.804 +/- 0.085 (in 3 folds),0.804 +/- 0.085 (in 3 folds),0.896 +/- 0.053 (in 3 folds),0.896 +/- 0.053 (in 3 folds),0.706 +/- 0.092 (in 3 folds),0.353 +/- 0.252 (in 3 folds),0.704,0.315,115,0,115,0.0,False
lasso_multiclass,0.786 +/- 0.136 (in 3 folds),0.786 +/- 0.136 (in 3 folds),0.893 +/- 0.067 (in 3 folds),0.893 +/- 0.067 (in 3 folds),0.705 +/- 0.059 (in 3 folds),0.393 +/- 0.164 (in 3 folds),0.704,0.373,115,0,115,0.0,False
linearsvm_ovr,0.776 +/- 0.159 (in 3 folds),0.776 +/- 0.159 (in 3 folds),0.884 +/- 0.084 (in 3 folds),0.884 +/- 0.084 (in 3 folds),0.723 +/- 0.059 (in 3 folds),0.419 +/- 0.162 (in 3 folds),0.722,0.402,115,0,115,0.0,False
xgboost,0.734 +/- 0.117 (in 3 folds),0.734 +/- 0.117 (in 3 folds),0.850 +/- 0.069 (in 3 folds),0.850 +/- 0.069 (in 3 folds),0.671 +/- 0.051 (in 3 folds),0.270 +/- 0.139 (in 3 folds),0.67,0.239,115,0,115,0.0,False
elasticnet_cv,0.721 +/- 0.123 (in 3 folds),0.721 +/- 0.123 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.670 +/- 0.031 (in 3 folds),0.093 +/- 0.161 (in 3 folds),0.67,0.182,115,0,115,0.0,False
lasso_cv,0.720 +/- 0.121 (in 3 folds),0.720 +/- 0.121 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.670 +/- 0.031 (in 3 folds),0.093 +/- 0.161 (in 3 folds),0.67,0.182,115,0,115,0.0,False
ridge_cv,0.682 +/- 0.201 (in 3 folds),0.682 +/- 0.201 (in 3 folds),0.795 +/- 0.189 (in 3 folds),0.795 +/- 0.189 (in 3 folds),0.644 +/- 0.057 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.643,-0.005,115,0,115,0.0,False
dummy_stratified,0.549 +/- 0.015 (in 3 folds),0.549 +/- 0.015 (in 3 folds),0.677 +/- 0.049 (in 3 folds),0.677 +/- 0.049 (in 3 folds),0.565 +/- 0.025 (in 3 folds),0.096 +/- 0.035 (in 3 folds),0.565,0.085,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.652,0.0,115,0,115,0.0,True


2023-01-07 04:10:52,864 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/age_group_pediatric_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/age_group_pediatric_healthy_only/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/age_group_pediatric_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/age_group_pediatric_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.978 +/- 0.022 (in 3 folds),0.978 +/- 0.022 (in 3 folds),0.937 +/- 0.063 (in 3 folds),0.937 +/- 0.063 (in 3 folds),0.826 +/- 0.052 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.826,0.0,115,0,115,0.0,True
linearsvm_ovr,0.971 +/- 0.030 (in 3 folds),0.971 +/- 0.030 (in 3 folds),0.925 +/- 0.072 (in 3 folds),0.925 +/- 0.072 (in 3 folds),0.948 +/- 0.026 (in 3 folds),0.838 +/- 0.061 (in 3 folds),0.948,0.827,115,0,115,0.0,False
elasticnet_cv,0.971 +/- 0.030 (in 3 folds),0.971 +/- 0.030 (in 3 folds),0.945 +/- 0.049 (in 3 folds),0.945 +/- 0.049 (in 3 folds),0.912 +/- 0.069 (in 3 folds),0.553 +/- 0.482 (in 3 folds),0.913,0.673,115,0,115,0.0,False
lasso_multiclass,0.968 +/- 0.038 (in 3 folds),0.968 +/- 0.038 (in 3 folds),0.932 +/- 0.064 (in 3 folds),0.932 +/- 0.064 (in 3 folds),0.965 +/- 0.016 (in 3 folds),0.888 +/- 0.046 (in 3 folds),0.965,0.885,115,0,115,0.0,False
rf_multiclass,0.958 +/- 0.019 (in 3 folds),0.958 +/- 0.019 (in 3 folds),0.809 +/- 0.138 (in 3 folds),0.809 +/- 0.138 (in 3 folds),0.922 +/- 0.024 (in 3 folds),0.702 +/- 0.097 (in 3 folds),0.922,0.706,115,0,115,0.0,False
lasso_cv,0.937 +/- 0.032 (in 3 folds),0.937 +/- 0.032 (in 3 folds),0.860 +/- 0.096 (in 3 folds),0.860 +/- 0.096 (in 3 folds),0.886 +/- 0.057 (in 3 folds),0.285 +/- 0.493 (in 3 folds),0.887,0.555,115,0,115,0.0,False
xgboost,0.936 +/- 0.037 (in 3 folds),0.936 +/- 0.037 (in 3 folds),0.846 +/- 0.082 (in 3 folds),0.846 +/- 0.082 (in 3 folds),0.939 +/- 0.014 (in 3 folds),0.761 +/- 0.118 (in 3 folds),0.939,0.778,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.174 +/- 0.052 (in 3 folds),0.174 +/- 0.052 (in 3 folds),0.826 +/- 0.052 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.826,0.0,115,0,115,0.0,True
dummy_stratified,0.425 +/- 0.106 (in 3 folds),0.425 +/- 0.106 (in 3 folds),0.171 +/- 0.046 (in 3 folds),0.171 +/- 0.046 (in 3 folds),0.633 +/- 0.115 (in 3 folds),-0.127 +/- 0.165 (in 3 folds),0.635,-0.138,115,0,115,0.0,False


2023-01-07 04:10:56,440 - analyze_repertoire_stats_models.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/sex_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/sex_healthy_only/train_smaller_model


## GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/repertoire_stats/TCR/sex_healthy_only/train_smaller_model to /users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/sex_healthy_only/train_smaller_model

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.569 +/- 0.023 (in 3 folds),0.569 +/- 0.023 (in 3 folds),0.613 +/- 0.148 (in 3 folds),0.613 +/- 0.148 (in 3 folds),0.496 +/- 0.053 (in 3 folds),0.003 +/- 0.063 (in 3 folds),0.496,-0.011,115,0,115,0.0,False
rf_multiclass,0.558 +/- 0.046 (in 3 folds),0.558 +/- 0.046 (in 3 folds),0.590 +/- 0.197 (in 3 folds),0.590 +/- 0.197 (in 3 folds),0.539 +/- 0.001 (in 3 folds),0.153 +/- 0.072 (in 3 folds),0.539,0.075,115,0,115,0.0,False
linearsvm_ovr,0.552 +/- 0.023 (in 3 folds),0.552 +/- 0.023 (in 3 folds),0.607 +/- 0.158 (in 3 folds),0.607 +/- 0.158 (in 3 folds),0.496 +/- 0.054 (in 3 folds),-0.004 +/- 0.077 (in 3 folds),0.496,-0.012,115,0,115,0.0,False
lasso_cv,0.521 +/- 0.036 (in 3 folds),0.521 +/- 0.036 (in 3 folds),0.542 +/- 0.216 (in 3 folds),0.542 +/- 0.216 (in 3 folds),0.461 +/- 0.092 (in 3 folds),0.017 +/- 0.030 (in 3 folds),0.461,-0.132,115,0,115,0.0,False
ridge_cv,0.519 +/- 0.032 (in 3 folds),0.519 +/- 0.032 (in 3 folds),0.536 +/- 0.207 (in 3 folds),0.536 +/- 0.207 (in 3 folds),0.384 +/- 0.092 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.383,-0.257,115,0,115,0.0,False
xgboost,0.517 +/- 0.030 (in 3 folds),0.517 +/- 0.030 (in 3 folds),0.565 +/- 0.204 (in 3 folds),0.565 +/- 0.204 (in 3 folds),0.522 +/- 0.064 (in 3 folds),0.133 +/- 0.145 (in 3 folds),0.522,0.041,115,0,115,0.0,False
elasticnet_cv,0.502 +/- 0.062 (in 3 folds),0.502 +/- 0.062 (in 3 folds),0.566 +/- 0.184 (in 3 folds),0.566 +/- 0.184 (in 3 folds),0.461 +/- 0.092 (in 3 folds),0.017 +/- 0.030 (in 3 folds),0.461,-0.132,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.513 +/- 0.168 (in 3 folds),0.513 +/- 0.168 (in 3 folds),0.384 +/- 0.092 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.383,-0.257,115,0,115,0.0,False
dummy_stratified,0.497 +/- 0.052 (in 3 folds),0.497 +/- 0.052 (in 3 folds),0.513 +/- 0.152 (in 3 folds),0.513 +/- 0.152 (in 3 folds),0.522 +/- 0.016 (in 3 folds),-0.006 +/- 0.102 (in 3 folds),0.522,0.041,115,0,115,0.0,False
