In [1]:
##################
# Libraries
##################
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC, SVC
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from loguru import logger

In [2]:
##################
# Constants
##################
MODEL_SEED=42
ROWS_SEED=[24, 42, 206, 602, 412, 214, 754, 457, 2023, 3202]
SIZE_OF_UNLABELLED_DATA = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9]
MODELS = [
    GaussianNB(var_smoothing=0.01519911082952933),
    DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3, min_samples_split=3),
    KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance'),
    RandomForestClassifier(criterion='gini', max_depth=15, min_samples_leaf=1, min_samples_split=9, n_estimators=400),
]

In [3]:
##################
# Functions
##################
def encode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """Encode dataframe using LabelEncoder"""
    labelencoder = LabelEncoder()

    for col in df.columns:
        df[col] = labelencoder.fit_transform(df[col])

    return df

def get_metrics() -> dict:
    metrics = {
        'accuracy': accuracy_score,
        'precision': precision_score,
        'recall': recall_score,
        'f1': f1_score,
        'roc': roc_auc_score
    }
    return metrics


def get_metrics_df(y_true, y_pred) -> pd.DataFrame:
    metrics = get_metrics()
    df = pd.DataFrame()
    for metric_name, metric in metrics.items():
        df[metric_name] = [metric(y_true, y_pred)]

    return df

def run_pipeline(X_train, X_test, y_train, y_test, size_of_unlabelled_data=0.5, model=None, rows_seed=42) -> dict:
    if model is None:
        raise ValueError("Model is None")
    
    X_train = X_train.copy()
    X_test = X_test.copy()
    y_train = y_train.copy()
    y_test = y_test.copy()

    # Unlabelled data
    rng = np.random.RandomState(rows_seed)
    random_rows_with_rng = rng.choice(X_train.index, size=int(len(X_train)*size_of_unlabelled_data), replace=False)
    y_train.loc[random_rows_with_rng] = -1

    # Scale data
    scaler = RobustScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    self_training_model = SelfTrainingClassifier(model, verbose=False)
    self_training_model.fit(X_train, y_train)

    # Predict
    y_pred = self_training_model.predict(X_test)

    # Evaluate - Get Metrics
    metrics = get_metrics_df(y_test, y_pred)
    accuracy = metrics["accuracy"].values[0]
    precision = metrics["precision"].values[0]
    recall = metrics["recall"].values[0]
    f1 = metrics["f1"].values[0]
    roc = metrics["roc"].values[0]
    

    return {"rows_seed": rows_seed, "model": type(model).__name__, "unlabeled": size_of_unlabelled_data, "acc": accuracy, "precision": precision, "recall": recall, "f1": f1, "roc": roc}

def run_pipeline_self(X_train, X_test, y_train, y_test) -> pd.DataFrame:
    run_counter = 0
    results = []

    for model in MODELS:
        for value in SIZE_OF_UNLABELLED_DATA:
            logger.info(f'Model: {model}')
            logger.info(f'Size of unlabelled data: {value}')
            for row_seed in ROWS_SEED:
                run_counter += 1
                logger.info(f"%{run_counter} - Running pipeline for row_seed: {row_seed} and size_of_unlabelled_data: {value} and model: {model}")
                result = run_pipeline(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, size_of_unlabelled_data=value, model=model, rows_seed=row_seed)
                results.append(result)
            print('\n')

    results_rows_seed = []
    results_models = []
    results_unlabeled = []
    results_acc = []
    results_precision = []
    results_recall = []
    results_f1 = []
    results_roc = []

    for result in results:
        results_rows_seed.append(result.get('rows_seed'))
        results_models.append(result.get('model'))
        results_unlabeled.append(result.get('unlabeled'))
        results_acc.append(result.get('acc'))
        results_precision.append(result.get('precision'))
        results_recall.append(result.get('recall'))
        results_f1.append(result.get('f1'))
        results_roc.append(result.get('roc'))


    results_df = pd.DataFrame({'rows_seed': results_rows_seed, 'model': results_models, 'unlabeled': results_unlabeled, 'acc': results_acc, 'precision': results_precision, 'recall': results_recall, 'f1': results_f1, 'roc': results_roc})

    return results_df

    

In [6]:
df = pd.read_csv('../../datasets/water_potability.csv')
df = df.dropna()

X = df.drop('Potability', axis=1)
y = df['Potability']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=MODEL_SEED)

In [7]:
results_df = run_pipeline_self(X_train, X_test, y_train, y_test)

[32m2023-10-22 14:34:21.235[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.235[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.1[0m
[32m2023-10-22 14:34:21.236[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%1 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.252[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%2 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.266[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%3 - Running pipeline for row_seed: 206 and size_of_unlabel







[32m2023-10-22 14:34:21.603[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%23 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.622[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%24 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.643[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%25 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.661[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%26 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.015199110829529





[32m2023-10-22 14:34:21.961[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.962[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.5[0m
[32m2023-10-22 14:34:21.962[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%41 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:21.996[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%42 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.020[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%43 - Running pipeline for row_seed: 206 and size_of_unla





[32m2023-10-22 14:34:22.170[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%50 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.188[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.188[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.6[0m
[32m2023-10-22 14:34:22.189[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%51 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.209[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%52 - Running pipeline for row_seed: 42 and size_of_unl







[32m2023-10-22 14:34:22.394[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%62 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.412[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%63 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.429[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%64 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.447[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%65 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.0151991108295293







[32m2023-10-22 14:34:22.762[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%83 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.779[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%84 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.798[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%85 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:34:22.815[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%86 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082







[32m2023-10-22 14:34:23.137[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%102 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:23.202[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%103 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:23.294[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%104 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 1





[32m2023-10-22 14:34:24.100[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%115 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:24.172[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%116 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:24.301[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%117 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:24.962[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%124 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:25.030[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%125 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:25.101[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%126 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:25.700[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%134 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:25.791[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%135 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:25.879[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%136 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:26.548[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%144 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:26.631[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%145 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:26.687[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%146 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:27.425[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%155 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:27.527[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%156 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:27.605[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%157 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:28.140[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%165 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:28.218[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%166 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:28.301[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%167 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:28.804[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%175 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:28.852[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%176 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:28.915[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%177 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:29.516[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%185 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:29.588[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%186 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:29.651[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%187 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-





[32m2023-10-22 14:34:30.074[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%195 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:30.148[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%196 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 14:34:30.199[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%197 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(criterion='entropy', max_depth=9, min_samples_leaf=3,
                       min_samples_split=3)[0m
[32m2023-10-22 





[32m2023-10-22 14:34:30.642[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%207 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:30.674[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%208 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:30.705[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%209 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:30.735[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%210 - Running pipeline for ro





[32m2023-10-22 14:34:30.988[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%216 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:31.044[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%217 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:31.103[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%218 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:31.148[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%219 - Running pipeline for row





[32m2023-10-22 14:34:31.493[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%225 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:31.572[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%226 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:31.654[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%227 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:31.732[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%228 - Running pipeline for row





[32m2023-10-22 14:34:32.207[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%234 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:32.292[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%235 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:32.388[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%236 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:32.485[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%237 - Running pipeline for row





[32m2023-10-22 14:34:33.021[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%243 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:33.141[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%244 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:33.242[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%245 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:33.322[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%246 - Running pipeline for row





[32m2023-10-22 14:34:34.146[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%254 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:34.249[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%255 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:34.335[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%256 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:34.439[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%257 - Running pipeline for row





[32m2023-10-22 14:34:35.048[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%264 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:35.148[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%265 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:35.214[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%266 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:35.303[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%267 - Running pipeline for row





[32m2023-10-22 14:34:35.873[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%275 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:35.947[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%276 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:36.009[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%277 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 14:34:36.077[0m | [1





  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:36.492[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%285 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:36.544[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%286 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:36.593[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%287 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weig





  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:37.048[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%296 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:37.094[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%297 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m2023-10-22 14:34:37.149[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%298 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights





[32m2023-10-22 14:34:42.293[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%302 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:34:49.771[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%303 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:34:55.892[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%304 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:35:03.218[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%305 - Running pipeline for row_see





[32m2023-10-22 14:35:58.783[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%312 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:36:09.979[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%313 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:36:21.001[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%314 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:36:29.513[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%315 - Running pipeline for row_see





[32m2023-10-22 14:37:45.839[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%322 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:37:57.644[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%323 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:38:09.427[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%324 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:38:20.492[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%325 - Running pipeline for row_see





[32m2023-10-22 14:39:32.403[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%332 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:39:43.934[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%333 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:39:55.174[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%334 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:40:05.701[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%335 - Running pipeline for row_see





[32m2023-10-22 14:41:20.192[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%342 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:41:30.973[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%343 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:41:41.348[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%344 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:41:51.093[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%345 - Running pipeline for row_see





[32m2023-10-22 14:43:03.975[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%352 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:43:13.810[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%353 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:43:22.634[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%354 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:43:32.451[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%355 - Running pipeline for row_see





[32m2023-10-22 14:44:39.325[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%362 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:44:49.913[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%363 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:45:00.090[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%364 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:45:09.281[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%365 - Running pipeline for row_see





[32m2023-10-22 14:46:21.705[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%372 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:46:32.876[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%373 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:46:43.064[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%374 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:46:53.008[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%375 - Running pipeline for row_see





[32m2023-10-22 14:48:05.864[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%382 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:48:16.390[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%383 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:48:28.563[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%384 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:48:38.138[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%385 - Running pipeline for row_





[32m2023-10-22 14:49:53.671[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%392 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:50:04.030[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%393 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:50:13.293[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%394 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=15, min_samples_split=9, n_estimators=400)[0m
[32m2023-10-22 14:50:22.300[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%395 - Running pipeline for row_see





In [8]:
# mean and standard deviation for all metrics
results_df_with_mean_std = results_df.groupby(['model', 'unlabeled']).agg({'acc': ['mean', 'std'], 'f1': ['mean', 'std'], 'precision': ['mean', 'std'], 'recall': ['mean', 'std'], 'roc': ['mean', 'std']})
results_df_with_mean_std = results_df_with_mean_std.reset_index(level=1)
results_df_with_mean_std.columns = ["_".join(col) for col in results_df_with_mean_std.columns.values]
results_df_with_mean_std.rename(columns={'unlabeled_': 'unlabeled'}, inplace=True)
results_df_with_mean_std.reset_index(inplace=True)
results_df_with_mean_std.columns

Index(['model', 'unlabeled', 'acc_mean', 'acc_std', 'f1_mean', 'f1_std',
       'precision_mean', 'precision_std', 'recall_mean', 'recall_std',
       'roc_mean', 'roc_std'],
      dtype='object')

In [9]:
# Metrics with tabulate
from tabulate import tabulate

print(tabulate(results_df_with_mean_std, headers='keys', tablefmt='psql', showindex=False))

+------------------------+-------------+------------+-------------+------------+------------+------------------+-----------------+---------------+--------------+------------+-------------+
| model                  |   unlabeled |   acc_mean |     acc_std |    f1_mean |     f1_std |   precision_mean |   precision_std |   recall_mean |   recall_std |   roc_mean |     roc_std |
|------------------------+-------------+------------+-------------+------------+------------+------------------+-----------------+---------------+--------------+------------+-------------|
| DecisionTreeClassifier |        0.1  |   0.623573 | 0.0190078   | 0.473598   | 0.0603724  |         0.592388 |       0.0432474 |   0.404651    |   0.0858507  |   0.595616 | 0.020058    |
| DecisionTreeClassifier |        0.2  |   0.616873 | 0.0183541   | 0.465505   | 0.0421481  |         0.579696 |       0.0374585 |   0.395349    |   0.0679125  |   0.588584 | 0.016758    |
| DecisionTreeClassifier |        0.3  |   0.613151 | 0