In [1]:
##################
# Libraries
##################
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC, SVC
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from loguru import logger

In [2]:
##################
# Constants
##################
MODEL_SEED=42
ROWS_SEED=[24, 42, 206, 602, 412, 214, 754, 457, 2023, 3202]
SIZE_OF_UNLABELLED_DATA = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9]
MODELS = [
    GaussianNB(var_smoothing=0.01519911082952933),
    DecisionTreeClassifier(criterion='gini', max_depth=5, min_samples_leaf=1, min_samples_split=3),
    KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance'),
    RandomForestClassifier(criterion='gini', max_depth=11, min_samples_leaf=1, min_samples_split=2, n_estimators=100),
]

In [3]:
##################
# Functions
##################
def encode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """Encode dataframe using LabelEncoder"""
    labelencoder = LabelEncoder()

    for col in df.columns:
        df[col] = labelencoder.fit_transform(df[col])

    return df

def get_metrics() -> dict:
    metrics = {
        'accuracy': accuracy_score,
        'precision': precision_score,
        'recall': recall_score,
        'f1': f1_score,
        'roc': roc_auc_score
    }
    return metrics


def get_metrics_df(y_true, y_pred) -> pd.DataFrame:
    metrics = get_metrics()
    df = pd.DataFrame()
    for metric_name, metric in metrics.items():
        df[metric_name] = [metric(y_true, y_pred)]

    return df

def run_pipeline(X_train, X_test, y_train, y_test, size_of_unlabelled_data=0.5, model=None, rows_seed=42) -> dict:
    if model is None:
        raise ValueError("Model is None")
    
    X_train = X_train.copy()
    X_test = X_test.copy()
    y_train = y_train.copy()
    y_test = y_test.copy()

    # Unlabelled data
    rng = np.random.RandomState(rows_seed)
    random_rows_with_rng = rng.choice(X_train.index, size=int(len(X_train)*size_of_unlabelled_data), replace=False)
    y_train.loc[random_rows_with_rng] = -1

    # Scale data
    scaler = RobustScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    self_training_model = SelfTrainingClassifier(model, verbose=False)
    self_training_model.fit(X_train, y_train)

    # Predict
    y_pred = self_training_model.predict(X_test)

    # Evaluate - Get Metrics
    metrics = get_metrics_df(y_test, y_pred)
    accuracy = metrics["accuracy"].values[0]
    precision = metrics["precision"].values[0]
    recall = metrics["recall"].values[0]
    f1 = metrics["f1"].values[0]
    roc = metrics["roc"].values[0]
    

    return {"rows_seed": rows_seed, "model": type(model).__name__, "unlabeled": size_of_unlabelled_data, "acc": accuracy, "precision": precision, "recall": recall, "f1": f1, "roc": roc}

def run_pipeline_self(X_train, X_test, y_train, y_test) -> pd.DataFrame:
    run_counter = 0
    results = []

    for model in MODELS:
        for value in SIZE_OF_UNLABELLED_DATA:
            logger.info(f'Model: {model}')
            logger.info(f'Size of unlabelled data: {value}')
            for row_seed in ROWS_SEED:
                run_counter += 1
                logger.info(f"%{run_counter} - Running pipeline for row_seed: {row_seed} and size_of_unlabelled_data: {value} and model: {model}")
                result = run_pipeline(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, size_of_unlabelled_data=value, model=model, rows_seed=row_seed)
                results.append(result)
            print('\n')

    results_rows_seed = []
    results_models = []
    results_unlabeled = []
    results_acc = []
    results_precision = []
    results_recall = []
    results_f1 = []
    results_roc = []

    for result in results:
        results_rows_seed.append(result.get('rows_seed'))
        results_models.append(result.get('model'))
        results_unlabeled.append(result.get('unlabeled'))
        results_acc.append(result.get('acc'))
        results_precision.append(result.get('precision'))
        results_recall.append(result.get('recall'))
        results_f1.append(result.get('f1'))
        results_roc.append(result.get('roc'))


    results_df = pd.DataFrame({'rows_seed': results_rows_seed, 'model': results_models, 'unlabeled': results_unlabeled, 'acc': results_acc, 'precision': results_precision, 'recall': results_recall, 'f1': results_f1, 'roc': results_roc})

    return results_df

    

In [4]:
df = pd.read_excel('../../datasets/Pumpkin_Seeds_Dataset.xlsx')

labelencoder = LabelEncoder()
df['Class'] = labelencoder.fit_transform(df['Class'])


X = df.drop('Class', axis=1)
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=MODEL_SEED)

In [5]:
results_df = run_pipeline_self(X_train, X_test, y_train, y_test)

[32m2023-10-22 14:25:46.791[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:46.792[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.1[0m
[32m2023-10-22 14:25:46.792[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%1 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:46.808[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%2 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:46.822[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%3 - Running pipeline for row_seed: 206 and size_of_unlabel







[32m2023-10-22 14:25:47.146[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%25 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.164[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%26 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.182[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%27 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.197[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%28 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.015199110829529







[32m2023-10-22 14:25:47.466[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%45 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.487[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%46 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.507[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%47 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.525[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%48 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.015199110829529







[32m2023-10-22 14:25:47.779[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%64 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.796[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%65 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.813[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%66 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:47.835[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%67 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.015199110829529







[32m2023-10-22 14:25:48.115[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%84 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:48.131[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%85 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:48.147[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%86 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082952933)[0m
[32m2023-10-22 14:25:48.164[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%87 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.01519911082







[32m2023-10-22 14:25:48.437[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%102 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:48.480[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%103 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:48.514[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%104 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:48.548[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%105 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.1 and model: Decisi





[32m2023-10-22 14:25:49.029[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%117 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.072[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%118 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.114[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%119 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.150[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%120 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.2 and model: Dec





[32m2023-10-22 14:25:49.424[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%127 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.465[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%128 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.498[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%129 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.531[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%130 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.3 and model: Dec





[32m2023-10-22 14:25:49.770[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%135 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.802[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%136 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.842[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%137 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:49.880[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%138 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.4 and model: Decis





[32m2023-10-22 14:25:50.239[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%146 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:50.281[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%147 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:50.327[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%148 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:50.366[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%149 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.5 and model: Deci





[32m2023-10-22 14:25:50.663[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%156 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:50.720[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%157 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:50.759[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%158 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:50.804[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%159 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.6 and model: Deci





[32m2023-10-22 14:25:51.127[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%166 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.187[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%167 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.225[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%168 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.261[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%169 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.7 and model: Deci





[32m2023-10-22 14:25:51.553[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%177 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.589[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%178 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.624[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%179 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.657[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%180 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.8 and model: Dec





[32m2023-10-22 14:25:51.904[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%185 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.942[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%186 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:51.984[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%187 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:52.020[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%188 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.85 and model: D





[32m2023-10-22 14:25:52.339[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%197 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:52.365[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%198 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:52.391[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%199 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=5, min_samples_split=3)[0m
[32m2023-10-22 14:25:52.429[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%200 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.9 and model: Dec





[32m2023-10-22 14:25:52.686[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%208 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:52.719[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%209 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:52.754[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%210 - Running pipeline for row_seed: 3202 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:52.789[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: KNeighborsClassifier(metr





[32m2023-10-22 14:25:53.000[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%216 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.034[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%217 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.078[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%218 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.116[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%219 - Running pipeline for row_se





[32m2023-10-22 14:25:53.404[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%226 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.461[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%227 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.527[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%228 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.564[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%229 - Running pipeline for row_se





[32m2023-10-22 14:25:53.868[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%235 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.913[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%236 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:53.964[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%237 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:54.005[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%238 - Running pipeline for row_se





[32m2023-10-22 14:25:54.380[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%245 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:54.434[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%246 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:54.488[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%247 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:54.542[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%248 - Running pipeline for row_se





[32m2023-10-22 14:25:54.918[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%255 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:54.978[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%256 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:55.031[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%257 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:55.079[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%258 - Running pipeline for row_se





[32m2023-10-22 14:25:55.479[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%266 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:55.527[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%267 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:55.582[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%268 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:55.630[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%269 - Running pipeline for row_se





[32m2023-10-22 14:25:55.935[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%275 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:55.987[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%276 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:56.035[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%277 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:56.084[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%278 - Running pipeline for row_se





[32m2023-10-22 14:25:56.451[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%285 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:56.498[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%286 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:56.547[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%287 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:56.595[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%288 - Running pipeline for row





[32m2023-10-22 14:25:56.991[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%296 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:57.035[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%297 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:57.085[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%298 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')[0m
[32m2023-10-22 14:25:57.130[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%299 - Running pipeline for row_se





[32m2023-10-22 14:25:59.081[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%302 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:00.579[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%303 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:03.034[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%304 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:04.257[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%305 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:06.





[32m2023-10-22 14:26:18.805[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%312 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:22.046[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%313 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:25.284[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%314 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:27.620[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%315 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:30.





[32m2023-10-22 14:26:41.978[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%322 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:44.440[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%323 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:47.442[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%324 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:50.495[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%325 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:26:53.





[32m2023-10-22 14:27:08.043[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%332 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:11.035[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%333 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:13.928[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%334 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:16.674[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%335 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:19.





[32m2023-10-22 14:27:35.015[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%342 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:37.866[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%343 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:40.790[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%344 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:42.604[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%345 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:27:45.





[32m2023-10-22 14:28:01.859[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%352 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:04.667[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%353 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:07.529[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%354 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:10.283[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%355 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:13.





[32m2023-10-22 14:28:29.843[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%362 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:32.583[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%363 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:35.302[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%364 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:37.981[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%365 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:40.





[32m2023-10-22 14:28:56.263[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%372 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:28:58.722[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%373 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:01.274[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%374 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:03.765[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%375 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:06.





[32m2023-10-22 14:29:20.533[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%382 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:22.896[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%383 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:25.425[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%384 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:27.761[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%385 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29





[32m2023-10-22 14:29:44.814[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%392 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:47.030[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%393 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:49.446[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%394 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:51.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%395 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=11)[0m
[32m2023-10-22 14:29:53.





In [6]:
# mean and standard deviation for all metrics
results_df_with_mean_std = results_df.groupby(['model', 'unlabeled']).agg({'acc': ['mean', 'std'], 'f1': ['mean', 'std'], 'precision': ['mean', 'std'], 'recall': ['mean', 'std'], 'roc': ['mean', 'std']})
results_df_with_mean_std = results_df_with_mean_std.reset_index(level=1)
results_df_with_mean_std.columns = ["_".join(col) for col in results_df_with_mean_std.columns.values]
results_df_with_mean_std.rename(columns={'unlabeled_': 'unlabeled'}, inplace=True)
results_df_with_mean_std.reset_index(inplace=True)
results_df_with_mean_std.columns

Index(['model', 'unlabeled', 'acc_mean', 'acc_std', 'f1_mean', 'f1_std',
       'precision_mean', 'precision_std', 'recall_mean', 'recall_std',
       'roc_mean', 'roc_std'],
      dtype='object')

In [7]:
# Metrics with tabulate
from tabulate import tabulate

print(tabulate(results_df_with_mean_std, headers='keys', tablefmt='psql', showindex=False))

+------------------------+-------------+------------+------------+-----------+------------+------------------+-----------------+---------------+--------------+------------+------------+
| model                  |   unlabeled |   acc_mean |    acc_std |   f1_mean |     f1_std |   precision_mean |   precision_std |   recall_mean |   recall_std |   roc_mean |    roc_std |
|------------------------+-------------+------------+------------+-----------+------------+------------------+-----------------+---------------+--------------+------------+------------|
| DecisionTreeClassifier |        0.1  |     0.855  | 0.00849837 |  0.850007 | 0.00967974 |         0.877062 |      0.0225789  |      0.825703 |   0.0280933  |   0.854883 | 0.0085024  |
| DecisionTreeClassifier |        0.2  |     0.852  | 0.0077746  |  0.845717 | 0.00847551 |         0.879893 |      0.020128   |      0.814859 |   0.0221147  |   0.851852 | 0.00776468 |
| DecisionTreeClassifier |        0.3  |     0.8506 | 0.00760409 |  0.