In [1]:
##################
# Libraries
##################
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC, SVC
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from loguru import logger

In [2]:
##################
# Constants
##################
MODEL_SEED=42
ROWS_SEED=[24, 42, 206, 602, 412, 214, 754, 457, 2023, 3202]
SIZE_OF_UNLABELLED_DATA = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9]
MODELS = [
    GaussianNB(var_smoothing=0.001),
    DecisionTreeClassifier(criterion='gini', max_depth=7, min_samples_leaf=1, min_samples_split=2),
    KNeighborsClassifier(metric='euclidean', n_neighbors=3, weights='uniform'),
    RandomForestClassifier(criterion='gini', max_depth=9, min_samples_leaf=1, min_samples_split=2, n_estimators=100),
]

In [3]:
##################
# Functions
##################
def encode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """Encode dataframe using LabelEncoder"""
    labelencoder = LabelEncoder()

    for col in df.columns:
        df[col] = labelencoder.fit_transform(df[col])

    return df

def get_metrics() -> dict:
    metrics = {
        'accuracy': accuracy_score,
        'precision': precision_score,
        'recall': recall_score,
        'f1': f1_score,
        'roc': roc_auc_score
    }
    return metrics


def get_metrics_df(y_true, y_pred) -> pd.DataFrame:
    metrics = get_metrics()
    df = pd.DataFrame()
    for metric_name, metric in metrics.items():
        df[metric_name] = [metric(y_true, y_pred)]

    return df

def run_pipeline(X_train, X_test, y_train, y_test, size_of_unlabelled_data=0.5, model=None, rows_seed=42) -> dict:
    if model is None:
        raise ValueError("Model is None")
    
    X_train = X_train.copy()
    X_test = X_test.copy()
    y_train = y_train.copy()
    y_test = y_test.copy()

    # Unlabelled data
    rng = np.random.RandomState(rows_seed)
    random_rows_with_rng = rng.choice(X_train.index, size=int(len(X_train)*size_of_unlabelled_data), replace=False)
    y_train.loc[random_rows_with_rng] = -1

    # Scale data
    scaler = RobustScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    self_training_model = SelfTrainingClassifier(model, verbose=False)
    self_training_model.fit(X_train, y_train)

    # Predict
    y_pred = self_training_model.predict(X_test)

    # Evaluate - Get Metrics
    metrics = get_metrics_df(y_test, y_pred)
    accuracy = metrics["accuracy"].values[0]
    precision = metrics["precision"].values[0]
    recall = metrics["recall"].values[0]
    f1 = metrics["f1"].values[0]
    roc = metrics["roc"].values[0]
    

    return {"rows_seed": rows_seed, "model": type(model).__name__, "unlabeled": size_of_unlabelled_data, "acc": accuracy, "precision": precision, "recall": recall, "f1": f1, "roc": roc}

def run_pipeline_self(X_train, X_test, y_train, y_test) -> pd.DataFrame:
    run_counter = 0
    results = []

    for model in MODELS:
        for value in SIZE_OF_UNLABELLED_DATA:
            logger.info(f'Model: {model}')
            logger.info(f'Size of unlabelled data: {value}')
            for row_seed in ROWS_SEED:
                run_counter += 1
                logger.info(f"%{run_counter} - Running pipeline for row_seed: {row_seed} and size_of_unlabelled_data: {value} and model: {model}")
                result = run_pipeline(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, size_of_unlabelled_data=value, model=model, rows_seed=row_seed)
                results.append(result)
            print('\n')

    results_rows_seed = []
    results_models = []
    results_unlabeled = []
    results_acc = []
    results_precision = []
    results_recall = []
    results_f1 = []
    results_roc = []

    for result in results:
        results_rows_seed.append(result.get('rows_seed'))
        results_models.append(result.get('model'))
        results_unlabeled.append(result.get('unlabeled'))
        results_acc.append(result.get('acc'))
        results_precision.append(result.get('precision'))
        results_recall.append(result.get('recall'))
        results_f1.append(result.get('f1'))
        results_roc.append(result.get('roc'))


    results_df = pd.DataFrame({'rows_seed': results_rows_seed, 'model': results_models, 'unlabeled': results_unlabeled, 'acc': results_acc, 'precision': results_precision, 'recall': results_recall, 'f1': results_f1, 'roc': results_roc})

    return results_df

    

In [4]:
df = pd.read_csv('../../datasets/mushrooms.csv')
df = encode_dataframe(df)


X = df.drop('class', axis=1)
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=MODEL_SEED)

In [5]:
results_df = run_pipeline_self(X_train, X_test, y_train, y_test)

[32m2023-10-22 12:47:18.660[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:18.661[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.1[0m
[32m2023-10-22 12:47:18.662[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%1 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:18.690[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%2 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:18.716[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%3 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_sm





[32m2023-10-22 12:47:19.134[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%19 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.2 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.161[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%20 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.2 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.190[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.190[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.3[0m
[32m2023-10-22 12:47:19.191[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%21 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.3 and model: GaussianNB(





[32m2023-10-22 12:47:19.394[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%29 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.420[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%30 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.447[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.448[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.4[0m
[32m2023-10-22 12:47:19.448[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%31 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.4 and model: GaussianNB(





[32m2023-10-22 12:47:19.654[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%39 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.4 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.682[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%40 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.4 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.707[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.707[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.5[0m
[32m2023-10-22 12:47:19.708[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%41 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.5 and model: GaussianNB(





[32m2023-10-22 12:47:19.931[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%49 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.961[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%50 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.993[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:19.994[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.6[0m
[32m2023-10-22 12:47:19.994[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%51 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.6 and model: GaussianNB(





[32m2023-10-22 12:47:20.202[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%58 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.235[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%59 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.261[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%60 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.296[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.297[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of





[32m2023-10-22 12:47:20.528[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%69 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.563[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%70 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.600[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.600[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.8[0m
[32m2023-10-22 12:47:20.601[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%71 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.8 and model: GaussianNB(





[32m2023-10-22 12:47:20.828[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%79 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.8 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.850[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%80 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.8 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.880[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:20.881[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.85[0m
[32m2023-10-22 12:47:20.881[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%81 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.85 and model: GaussianN





[32m2023-10-22 12:47:21.082[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%88 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.128[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%89 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.153[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%90 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.180[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.181[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize





[32m2023-10-22 12:47:21.386[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%98 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.414[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%99 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.442[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%100 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.001)[0m
[32m2023-10-22 12:47:21.468[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:21.468[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSi





[32m2023-10-22 12:47:21.681[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%109 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:21.710[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%110 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:21.736[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:21.737[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.2[0m
[32m2023-10-22 12:47:21.738[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%111 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.2 and mod





[32m2023-10-22 12:47:21.947[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%118 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:21.973[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%119 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:21.999[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%120 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.025[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.026[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m7





[32m2023-10-22 12:47:22.244[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%129 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.270[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%130 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.312[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.313[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.4[0m
[32m2023-10-22 12:47:22.314[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%131 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.4 and mod





[32m2023-10-22 12:47:22.518[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%138 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.543[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%139 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.569[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%140 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.593[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.594[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m7





[32m2023-10-22 12:47:22.812[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%149 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.836[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%150 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.861[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:22.861[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.6[0m
[32m2023-10-22 12:47:22.862[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%151 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.6 and mod





[32m2023-10-22 12:47:23.082[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%160 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.113[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.114[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.7[0m
[32m2023-10-22 12:47:23.114[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%161 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.139[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%162 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model





[32m2023-10-22 12:47:23.335[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%170 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.371[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.371[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.8[0m
[32m2023-10-22 12:47:23.372[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%171 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.397[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%172 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model





[32m2023-10-22 12:47:23.597[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%180 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.625[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.626[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.85[0m
[32m2023-10-22 12:47:23.626[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%181 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.650[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%182 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and mo





[32m2023-10-22 12:47:23.840[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%190 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.868[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.869[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.9[0m
[32m2023-10-22 12:47:23.869[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%191 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:23.891[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%192 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and mode





[32m2023-10-22 12:47:24.074[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%200 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=7)[0m
[32m2023-10-22 12:47:24.102[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:24.103[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.1[0m
[32m2023-10-22 12:47:24.103[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%201 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:24.187[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%202 - Running pipeline for row_seed: 42 an





[32m2023-10-22 12:47:24.330[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%204 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:24.403[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%205 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:24.468[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%206 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:24.529[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%207 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.1 and model: KNeighbo





[32m2023-10-22 12:47:25.053[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%214 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:25.136[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%215 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:25.235[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%216 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:25.332[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%217 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.2 and model: KNeighbo





[32m2023-10-22 12:47:25.867[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%223 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:25.954[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%224 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:26.025[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%225 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:26.098[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%226 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: KNeighbo





[32m2023-10-22 12:47:26.734[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%234 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:26.832[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%235 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:26.903[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%236 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:26.984[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%237 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.4 and model: KNeighbo





[32m2023-10-22 12:47:27.670[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%244 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:27.769[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%245 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:27.940[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%246 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:28.036[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%247 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.5 and model: KNeighbo





[32m2023-10-22 12:47:28.655[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%254 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:28.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%255 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:28.825[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%256 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:28.901[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%257 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.6 and model: KNeighbo





[32m2023-10-22 12:47:29.446[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%264 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:29.550[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%265 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:29.697[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%266 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:29.779[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%267 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.7 and model: KNeighbo





[32m2023-10-22 12:47:30.305[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%274 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:30.457[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%275 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:30.529[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%276 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:30.599[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%277 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.8 and model: KNeighbo





[32m2023-10-22 12:47:31.154[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%283 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:31.243[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%284 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:31.319[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%285 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:31.407[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%286 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: KNei





[32m2023-10-22 12:47:32.078[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%294 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:32.166[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%295 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:32.234[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%296 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='euclidean', n_neighbors=3)[0m
[32m2023-10-22 12:47:32.309[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%297 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.9 and model: KNeighbo





[32m2023-10-22 12:47:32.982[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%302 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:33.705[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%303 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:34.229[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%304 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:34.587[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%305 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:34.942





[32m2023-10-22 12:47:38.395[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%312 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:38.960[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%313 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:39.818[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%314 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:40.170[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%315 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:40.877





[32m2023-10-22 12:47:44.718[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%322 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:45.216[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%323 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:45.895[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%324 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:46.426[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%325 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:47.008





[32m2023-10-22 12:47:51.119[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%332 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:51.789[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%333 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:52.126[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%334 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:52.616[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%335 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:53.287





[32m2023-10-22 12:47:56.943[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%342 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:57.440[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%343 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:58.118[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%344 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:58.781[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%345 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:47:59.776





[32m2023-10-22 12:48:03.759[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%352 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:04.247[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%353 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:04.917[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%354 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:05.404[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%355 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:05.896





[32m2023-10-22 12:48:10.374[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%362 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:11.015[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%363 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:11.992[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%364 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:13.141[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%365 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:14.318





[32m2023-10-22 12:48:18.408[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%372 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:19.057[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%373 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:19.874[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%374 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:20.849[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%375 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:21.490





[32m2023-10-22 12:48:26.109[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%382 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:26.924[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%383 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:27.766[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%384 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:28.595[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%385 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:30.





[32m2023-10-22 12:48:35.935[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%392 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:36.906[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%393 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:38.070[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%394 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:39.245[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%395 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=9)[0m
[32m2023-10-22 12:48:40.419





In [6]:
# mean and standard deviation for all metrics
results_df_with_mean_std = results_df.groupby(['model', 'unlabeled']).agg({'acc': ['mean', 'std'], 'f1': ['mean', 'std'], 'precision': ['mean', 'std'], 'recall': ['mean', 'std'], 'roc': ['mean', 'std']})
results_df_with_mean_std = results_df_with_mean_std.reset_index(level=1)
results_df_with_mean_std.columns = ["_".join(col) for col in results_df_with_mean_std.columns.values]
results_df_with_mean_std.rename(columns={'unlabeled_': 'unlabeled'}, inplace=True)
results_df_with_mean_std.reset_index(inplace=True)
results_df_with_mean_std.columns

Index(['model', 'unlabeled', 'acc_mean', 'acc_std', 'f1_mean', 'f1_std',
       'precision_mean', 'precision_std', 'recall_mean', 'recall_std',
       'roc_mean', 'roc_std'],
      dtype='object')

In [8]:
# Metrics with tabulate
from tabulate import tabulate

print(tabulate(results_df_with_mean_std, headers='keys', tablefmt='psql', showindex=False))

+------------------------+-------------+------------+-------------+-----------+-------------+------------------+-----------------+---------------+--------------+------------+-------------+
| model                  |   unlabeled |   acc_mean |     acc_std |   f1_mean |      f1_std |   precision_mean |   precision_std |   recall_mean |   recall_std |   roc_mean |     roc_std |
|------------------------+-------------+------------+-------------+-----------+-------------+------------------+-----------------+---------------+--------------+------------+-------------|
| DecisionTreeClassifier |        0.1  |   1        | 0           |  1        | 0           |         1        |     0           |      1        |  0           |   1        | 0           |
| DecisionTreeClassifier |        0.2  |   0.997785 | 0.00286005  |  0.997711 | 0.0029546   |         0.995449 |     0.00587558  |      1        |  0           |   0.997865 | 0.00275657  |
| DecisionTreeClassifier |        0.3  |   0.998338 | 0