In [1]:
##################
# Libraries
##################
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC, SVC
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from loguru import logger

In [3]:
##################
# Constants
##################
MODEL_SEED=42
ROWS_SEED=[24, 42, 206, 602, 412, 214, 754, 457, 2023, 3202]
SIZE_OF_UNLABELLED_DATA = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9]
MODELS = [
    GaussianNB(var_smoothing=0.8111308307896871),
    DecisionTreeClassifier(criterion='gini', max_depth=15, min_samples_leaf=1, min_samples_split=2),
    KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance'),
    RandomForestClassifier(criterion='gini', max_depth=15, min_samples_leaf=1, min_samples_split=3, n_estimators=200),
]

In [4]:
##################
# Functions
##################
def encode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """Encode dataframe using LabelEncoder"""
    labelencoder = LabelEncoder()

    for col in df.columns:
        df[col] = labelencoder.fit_transform(df[col])

    return df

def get_metrics() -> dict:
    metrics = {
        'accuracy': accuracy_score,
        'precision': precision_score,
        'recall': recall_score,
        'f1': f1_score,
        'roc': roc_auc_score
    }
    return metrics


def get_metrics_df(y_true, y_pred) -> pd.DataFrame:
    metrics = get_metrics()
    df = pd.DataFrame()
    for metric_name, metric in metrics.items():
        df[metric_name] = [metric(y_true, y_pred)]

    return df

def run_pipeline(X_train, X_test, y_train, y_test, size_of_unlabelled_data=0.5, model=None, rows_seed=42) -> dict:
    if model is None:
        raise ValueError("Model is None")
    
    X_train = X_train.copy()
    X_test = X_test.copy()
    y_train = y_train.copy()
    y_test = y_test.copy()

    # Unlabelled data
    rng = np.random.RandomState(rows_seed)
    random_rows_with_rng = rng.choice(X_train.index, size=int(len(X_train)*size_of_unlabelled_data), replace=False)
    y_train.loc[random_rows_with_rng] = -1

    # Scale data
    scaler = RobustScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    self_training_model = SelfTrainingClassifier(model, verbose=False)
    self_training_model.fit(X_train, y_train)

    # Predict
    y_pred = self_training_model.predict(X_test)

    # Evaluate - Get Metrics
    metrics = get_metrics_df(y_test, y_pred)
    accuracy = metrics["accuracy"].values[0]
    precision = metrics["precision"].values[0]
    recall = metrics["recall"].values[0]
    f1 = metrics["f1"].values[0]
    roc = metrics["roc"].values[0]
    

    return {"rows_seed": rows_seed, "model": type(model).__name__, "unlabeled": size_of_unlabelled_data, "acc": accuracy, "precision": precision, "recall": recall, "f1": f1, "roc": roc}

def run_pipeline_self(X_train, X_test, y_train, y_test) -> pd.DataFrame:
    run_counter = 0
    results = []

    for model in MODELS:
        for value in SIZE_OF_UNLABELLED_DATA:
            logger.info(f'Model: {model}')
            logger.info(f'Size of unlabelled data: {value}')
            for row_seed in ROWS_SEED:
                run_counter += 1
                logger.info(f"%{run_counter} - Running pipeline for row_seed: {row_seed} and size_of_unlabelled_data: {value} and model: {model}")
                result = run_pipeline(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, size_of_unlabelled_data=value, model=model, rows_seed=row_seed)
                results.append(result)
            print('\n')

    results_rows_seed = []
    results_models = []
    results_unlabeled = []
    results_acc = []
    results_precision = []
    results_recall = []
    results_f1 = []
    results_roc = []

    for result in results:
        results_rows_seed.append(result.get('rows_seed'))
        results_models.append(result.get('model'))
        results_unlabeled.append(result.get('unlabeled'))
        results_acc.append(result.get('acc'))
        results_precision.append(result.get('precision'))
        results_recall.append(result.get('recall'))
        results_f1.append(result.get('f1'))
        results_roc.append(result.get('roc'))


    results_df = pd.DataFrame({'rows_seed': results_rows_seed, 'model': results_models, 'unlabeled': results_unlabeled, 'acc': results_acc, 'precision': results_precision, 'recall': results_recall, 'f1': results_f1, 'roc': results_roc})

    return results_df

    

In [7]:
df = pd.read_csv('../../datasets/Hotel Reservations.csv')
df.set_index('Booking_ID', inplace=True)


labelencoder = LabelEncoder()

df['type_of_meal_plan'] = labelencoder.fit_transform(df['type_of_meal_plan'])
df['room_type_reserved'] = labelencoder.fit_transform(df['room_type_reserved'])
df['market_segment_type'] = labelencoder.fit_transform(df['market_segment_type'])
df['booking_status'] = labelencoder.fit_transform(df['booking_status'])


X = df.drop('booking_status', axis=1)
y = df['booking_status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=MODEL_SEED)

In [8]:
results_df = run_pipeline_self(X_train, X_test, y_train, y_test)

[32m2023-10-22 11:01:35.120[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m75[0m - [1mModel: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:35.121[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m76[0m - [1mSize of unlabelled data: 0.1[0m
[32m2023-10-22 11:01:35.121[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%1 - Running pipeline for row_seed: 24 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:35.185[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%2 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:35.235[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%3 - Running pipeline for row_seed: 206 and size_of_unlabelled





[32m2023-10-22 11:01:35.869[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%16 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.2 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:35.917[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%17 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.2 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:35.964[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%18 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.2 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.009[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%19 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.2 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:36.334[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%26 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.390[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%27 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.443[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%28 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.494[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%29 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.3 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:36.829[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%36 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.4 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.881[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%37 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.4 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.926[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%38 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.4 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:36.970[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%39 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.4 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:37.298[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%46 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:37.346[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%47 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:37.402[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%48 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:37.446[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%49 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.5 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:37.753[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%56 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:37.801[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%57 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:37.845[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%58 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:37.888[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%59 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.6 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:38.204[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%66 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:38.252[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%67 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:38.308[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%68 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:38.361[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%69 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.7 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:38.690[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%76 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.8 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:38.733[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%77 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.8 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:38.776[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%78 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.8 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:38.819[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%79 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.8 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:39.151[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%86 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:39.199[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%87 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:39.245[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%88 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:39.293[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%89 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.85 and model: GaussianNB(var_smoothing=0.8111308307896





[32m2023-10-22 11:01:39.632[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%96 - Running pipeline for row_seed: 214 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:39.680[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%97 - Running pipeline for row_seed: 754 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:39.727[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%98 - Running pipeline for row_seed: 457 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.8111308307896871)[0m
[32m2023-10-22 11:01:39.774[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%99 - Running pipeline for row_seed: 2023 and size_of_unlabelled_data: 0.9 and model: GaussianNB(var_smoothing=0.8111308307896871)





[32m2023-10-22 11:01:40.298[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%102 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:40.782[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%103 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:41.200[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%104 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:41.616[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%105 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.1 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:42.





[32m2023-10-22 11:01:44.815[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%112 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:45.287[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%113 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:45.886[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%114 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:46.613[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%115 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.2 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:47.





[32m2023-10-22 11:01:49.684[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%122 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:50.216[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%123 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:50.825[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%124 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:51.403[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%125 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.3 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:52.





[32m2023-10-22 11:01:54.940[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%132 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:55.402[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%133 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:55.928[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%134 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:56.320[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%135 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.4 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:01:56.





[32m2023-10-22 11:02:00.274[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%142 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:00.659[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%143 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:01.101[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%144 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:01.688[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%145 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.5 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:02.





[32m2023-10-22 11:02:05.576[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%152 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:06.223[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%153 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:06.668[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%154 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:07.263[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%155 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.6 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:07.





[32m2023-10-22 11:02:10.743[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%162 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:11.180[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%163 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:11.614[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%164 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:12.131[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%165 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.7 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:12.





[32m2023-10-22 11:02:15.601[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%172 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:16.160[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%173 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:16.667[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%174 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:17.166[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%175 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.8 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:17.





[32m2023-10-22 11:02:20.493[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%182 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:20.857[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%183 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:21.213[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%184 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:21.581[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%185 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.85 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02





[32m2023-10-22 11:02:24.452[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%192 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:24.809[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%193 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:25.313[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%194 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:25.818[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%195 - Running pipeline for row_seed: 412 and size_of_unlabelled_data: 0.9 and model: DecisionTreeClassifier(max_depth=15)[0m
[32m2023-10-22 11:02:26.





[32m2023-10-22 11:02:28.837[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%202 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:29.628[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%203 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:30.452[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%204 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:31.167[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%205 - Running pipeline for row_





[32m2023-10-22 11:02:36.887[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%212 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:37.911[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%213 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:39.151[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%214 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:40.182[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%215 - Running pipeline for row_





[32m2023-10-22 11:02:48.073[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%222 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:49.426[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%223 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:50.708[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%224 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:02:51.985[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%225 - Running pipeline for row_





[32m2023-10-22 11:03:01.911[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%232 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:03.475[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%233 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:05.158[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%234 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:06.730[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%235 - Running pipeline for row_





[32m2023-10-22 11:03:19.687[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%242 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:21.634[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%243 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:23.671[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%244 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:25.740[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%245 - Running pipeline for row_





[32m2023-10-22 11:03:39.636[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%252 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:41.888[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%253 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:44.058[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%254 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:03:46.287[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%255 - Running pipeline for row_





[32m2023-10-22 11:04:01.472[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%262 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:03.706[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%263 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:05.907[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%264 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:08.193[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%265 - Running pipeline for row_





[32m2023-10-22 11:04:23.954[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%272 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:26.075[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%273 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:28.095[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%274 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:30.366[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%275 - Running pipeline for row_





[32m2023-10-22 11:04:45.828[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%282 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:47.896[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%283 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:49.964[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%284 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:04:52.005[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%285 - Running pipeline for r





[32m2023-10-22 11:05:06.351[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%292 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:05:08.462[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%293 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:05:10.422[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%294 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: KNeighborsClassifier(metric='manhattan', n_neighbors=15, weights='distance')[0m
[32m2023-10-22 11:05:12.337[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%295 - Running pipeline for row_





[32m2023-10-22 11:05:56.506[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%302 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:06:29.086[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%303 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:07:01.348[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%304 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.1 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:07:33.511[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%305 - Running pipeline for row_see





[32m2023-10-22 11:11:19.877[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%312 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:11:51.609[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%313 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:12:23.390[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%314 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.2 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:12:55.587[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%315 - Running pipeline for row_see





[32m2023-10-22 11:16:38.271[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%322 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:17:08.273[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%323 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:17:38.456[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%324 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.3 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:18:08.642[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%325 - Running pipeline for row_see





[32m2023-10-22 11:21:39.601[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%332 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:22:09.212[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%333 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:22:38.702[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%334 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.4 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:23:08.240[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%335 - Running pipeline for row_see





[32m2023-10-22 11:26:53.022[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%342 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:27:21.661[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%343 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:27:50.537[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%344 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.5 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:28:19.337[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%345 - Running pipeline for row_see





[32m2023-10-22 11:31:42.374[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%352 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:32:10.537[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%353 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:32:38.896[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%354 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.6 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:33:07.348[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%355 - Running pipeline for row_see





[32m2023-10-22 11:36:24.759[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%362 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:36:52.275[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%363 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:37:19.881[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%364 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.7 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:37:47.577[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%365 - Running pipeline for row_see





[32m2023-10-22 11:41:04.291[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%372 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:41:31.581[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%373 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:41:58.648[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%374 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.8 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:42:24.754[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%375 - Running pipeline for row_see





[32m2023-10-22 11:45:30.016[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%382 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:45:56.203[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%383 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:46:22.875[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%384 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.85 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:46:49.780[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%385 - Running pipeline for row_





[32m2023-10-22 11:49:57.172[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%392 - Running pipeline for row_seed: 42 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:50:23.184[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%393 - Running pipeline for row_seed: 206 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:50:48.983[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%394 - Running pipeline for row_seed: 602 and size_of_unlabelled_data: 0.9 and model: RandomForestClassifier(max_depth=15, min_samples_split=3, n_estimators=200)[0m
[32m2023-10-22 11:51:14.463[0m | [1mINFO    [0m | [36m__main__[0m:[36mrun_pipeline_self[0m:[36m79[0m - [1m%395 - Running pipeline for row_see





In [9]:
# mean and standard deviation for all metrics
results_df_with_mean_std = results_df.groupby(['model', 'unlabeled']).agg({'acc': ['mean', 'std'], 'f1': ['mean', 'std'], 'precision': ['mean', 'std'], 'recall': ['mean', 'std'], 'roc': ['mean', 'std']})
results_df_with_mean_std = results_df_with_mean_std.reset_index(level=1)
results_df_with_mean_std.columns = ["_".join(col) for col in results_df_with_mean_std.columns.values]
results_df_with_mean_std.rename(columns={'unlabeled_': 'unlabeled'}, inplace=True)
results_df_with_mean_std.reset_index(inplace=True)
results_df_with_mean_std.columns

Index(['model', 'unlabeled', 'acc_mean', 'acc_std', 'f1_mean', 'f1_std',
       'precision_mean', 'precision_std', 'recall_mean', 'recall_std',
       'roc_mean', 'roc_std'],
      dtype='object')