In [1]:
import numpy as np
import pandas as pd
from lazypredict.Supervised import LazyClassifier
import pickle
from sklearn.model_selection import train_test_split
import pathlib
import os
from sklearn.preprocessing import StandardScaler, OneHotEncoder



In [2]:
PROJECT_PATH = os.getcwd()
PROJECT_PATH = os.path.join(PROJECT_PATH, "..")
MODELS_DIR = pathlib.Path(PROJECT_PATH) / "store" / "models"

DATASET_DIR = pathlib.Path(PROJECT_PATH) / "data"

In [13]:
SAMPLE_SIZE = 0.2


def preprocess(X: pd.DataFrame, y: pd.Series, path: pathlib.Path, sample_size: float = SAMPLE_SIZE):
    # Identify categorical and numeric columns
    categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
    numeric_cols = X.select_dtypes(include=['number']).columns.tolist()
    
    # Initialize lists to store processed columns
    processed_columns = []

    # If there are categorical columns, apply one-hot encoding
    if categorical_cols:
        print("Encoding categorical columns...")
        onehot_encoder = OneHotEncoder(categories='auto', sparse=False)
        X_categorical = pd.DataFrame(onehot_encoder.fit_transform(X[categorical_cols]),
                                     columns=onehot_encoder.get_feature_names_out(categorical_cols))
        processed_columns.append(X_categorical)
    
    # Apply standard scaling to the numeric columns
    if numeric_cols:
        print("Scaling numerical columns...")
        scaler = StandardScaler()
        X_numeric = pd.DataFrame(scaler.fit_transform(X[numeric_cols]), columns=numeric_cols)
        processed_columns.append(X_numeric)
    
    # Combine the processed columns
    if processed_columns:
        X_processed = pd.concat(processed_columns, axis=1)
    else:
        X_processed = X.copy()  # If there are no categorical or numeric columns, keep the original dataframe
    
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.1,stratify=y, random_state=42)   
    
    # Scale the training and test sets
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Sample a subset of the training data
    _, X_sample, _, y_sample = train_test_split(X_train, y_train, test_size=sample_size,stratify=y_train, random_state=42)
    
    # Print the shapes of the datasets
    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)
    print("y_train shape:", y_train.shape)
    print("y_test shape:", y_test.shape)
    print("X_sample shape:", X_sample.shape)
    print("y_sample shape:", y_sample.shape)
    
    # Save the processed data
    with open(path / f'dataset_{sample_size}.pkl', 'wb') as f:
        pickle.dump([X_sample, y_sample.values, X_test, y_test.values], f)
        
    return X_train, y_train, X_test, y_test

In [4]:
def train_and_save_models(X_train, y_train, X_test, y_test):
    clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
    _,predictions = clf.fit(X_train, X_test, y_train, y_test)
    models_to_save = "RandomForestClassifier-BernoulliNB-CalibratedClassifierCV-LinearDiscriminantAnalysis-LogisticRegression-AdaBoostClassifier-ExtraTreesClassifier-XGBClassifier-LGBMClassifier".split("-")
    models = []
    for model in models_to_save:
        if model in clf.models:
            models.append((model, clf.models[model]))

    with open(MODELS_DIR / f'{DATASET_NAME}_{SAMPLE_SIZE}_models.pkl', 'wb') as f:
        pickle.dump(models, f)
    
    return predictions
    

# Secondary Mushroom

In [32]:
DATASET_NAME = "secondary_mushroom"

DATASET_PATH = DATASET_DIR / DATASET_NAME
DATASET_PATH.mkdir(parents=True, exist_ok=True)

In [33]:
from ucimlrepo import fetch_ucirepo 

  
path = DATASET_PATH / "dataset.csv"

if path.exists():
    dataset = pd.read_csv(path)
    X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]

else:
    # fetch dataset 
    secondary_mushroom = fetch_ucirepo(id=848) 
      
    # data (as pandas dataframes) 
    X = secondary_mushroom.data.features 
    y = secondary_mushroom.data.targets 
     
      
    pd.concat([X, y], axis=1).to_csv(DATASET_PATH / "dataset.csv", index=False)
      
y.value_counts()


class
p        33888
e        27181
Name: count, dtype: int64

In [34]:
y.replace({"p": 0, "e": 1}, inplace=True)
y.value_counts()

class
0        33888
1        27181
Name: count, dtype: int64

In [35]:
X_train, y_train, X_test, y_test = preprocess(X, y, DATASET_PATH)


Encoding categorical columns...
Scaling numerical columns...
X_train shape: (54962, 128)
X_test shape: (6107, 128)
y_train shape: (54962, 1)
y_test shape: (6107, 1)
X_sample shape: (10993, 128)
y_sample shape: (10993, 1)


In [36]:
train_and_save_models(X_train, y_train, X_test, y_test)


 97%|█████████▋| 28/29 [26:46<00:12, 12.26s/it]   

[LightGBM] [Info] Number of positive: 24463, number of negative: 30499
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.015600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1140
[LightGBM] [Info] Number of data points in the train set: 54962, number of used features: 128
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.445089 -> initscore=-0.220532
[LightGBM] [Info] Start training from score -0.220532


100%|██████████| 29/29 [26:47<00:00, 55.45s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreeClassifier,1.0,1.0,1.0,1.0,0.34
ExtraTreesClassifier,1.0,1.0,1.0,1.0,6.21
RandomForestClassifier,1.0,1.0,1.0,1.0,6.06
KNeighborsClassifier,1.0,1.0,1.0,1.0,1.87
LabelPropagation,1.0,1.0,1.0,1.0,306.7
LabelSpreading,1.0,1.0,1.0,1.0,766.66
BaggingClassifier,1.0,1.0,1.0,1.0,5.84
LGBMClassifier,1.0,1.0,1.0,1.0,1.26
XGBClassifier,1.0,1.0,1.0,1.0,1.31
DecisionTreeClassifier,1.0,1.0,1.0,1.0,1.12


# Dry Bean

In [27]:
DATASET_NAME = "dry_bean"

DATASET_PATH = DATASET_DIR / DATASET_NAME
DATASET_PATH.mkdir(parents=True, exist_ok=True)

In [28]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
  
path = DATASET_PATH / "dataset.csv"

if path.exists():
    dataset = pd.read_csv(path)
    X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]

else:
    # fetch dataset 
    dry_bean = fetch_ucirepo(id=602) 
      
    # data (as pandas dataframes) 
    X = dry_bean.data.features 
    y = dry_bean.data.targets 
      
    pd.concat([X, y], axis=1).to_csv(DATASET_PATH / "dataset.csv", index=False)
      
y.value_counts()

Class   
DERMASON    3546
SIRA        2636
SEKER       2027
HOROZ       1928
CALI        1630
BARBUNYA    1322
BOMBAY       522
Name: count, dtype: int64

In [29]:
y.replace({"DERMASON": 0, "SIRA": 1, "SEKER": 2, "HOROZ": 3, "CALI":4, "BARBUNYA":5, "BOMBAY": 6}, inplace=True)
y.value_counts()

Class
0        3546
1        2636
2        2027
3        1928
4        1630
5        1322
6         522
Name: count, dtype: int64

In [30]:
X_train, y_train, X_test, y_test = preprocess(X, y, DATASET_PATH)


Scaling numerical columns...
X_train shape: (12249, 16)
X_test shape: (1362, 16)
y_train shape: (12249, 1)
y_test shape: (1362, 1)
X_sample shape: (2450, 16)
y_sample shape: (2450, 1)


In [31]:
train_and_save_models(X_train, y_train, X_test, y_test)


 97%|█████████▋| 28/29 [00:41<00:01,  1.02s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000658 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4080
[LightGBM] [Info] Number of data points in the train set: 12249, number of used features: 16
[LightGBM] [Info] Start training from score -1.345110
[LightGBM] [Info] Start training from score -1.641711
[LightGBM] [Info] Start training from score -1.904412
[LightGBM] [Info] Start training from score -1.954437
[LightGBM] [Info] Start training from score -2.122225
[LightGBM] [Info] Start training from score -2.331491
[LightGBM] [Info] Start training from score -3.260467


100%|██████████| 29/29 [00:42<00:00,  1.48s/it]






Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SVC,0.93,0.94,,0.93,1.0
KNeighborsClassifier,0.92,0.93,,0.92,0.09
RandomForestClassifier,0.93,0.93,,0.93,9.57
LGBMClassifier,0.93,0.93,,0.93,1.54
ExtraTreesClassifier,0.93,0.93,,0.93,1.15
XGBClassifier,0.92,0.93,,0.92,1.53
LogisticRegression,0.92,0.93,,0.92,0.31
LabelSpreading,0.91,0.93,,0.91,13.89
LabelPropagation,0.91,0.92,,0.91,4.29
QuadraticDiscriminantAnalysis,0.91,0.92,,0.91,0.05


# Bank Marketing

In [19]:
DATASET_NAME = "bank_marketing"

DATASET_PATH = DATASET_DIR / DATASET_NAME
DATASET_PATH.mkdir(parents=True, exist_ok=True)

In [23]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
  
path = DATASET_PATH / "dataset.csv"

if path.exists():
    dataset = pd.read_csv(path)
    X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]

else:
    # fetch dataset 
    bank_marketing = fetch_ucirepo(id=222) 
      
    # data (as pandas dataframes) 
    X = bank_marketing.data.features
    y = bank_marketing.data.targets 
      
y.value_counts()


y
no     39922
yes     5289
Name: count, dtype: int64

In [24]:
y.replace({"no": 0, "yes": 1}, inplace=True)
y.value_counts()

y
0    39922
1     5289
Name: count, dtype: int64

In [25]:
X_train, y_train, X_test, y_test = preprocess(X, y, DATASET_PATH)


Encoding categorical columns...
Scaling numerical columns...
X_train shape: (40689, 51)
X_test shape: (4522, 51)
y_train shape: (40689,)
y_test shape: (4522,)
X_sample shape: (8138, 51)
y_sample shape: (8138,)


In [26]:
train_and_save_models(X_train, y_train, X_test, y_test)


 97%|█████████▋| 28/29 [05:23<00:06,  6.80s/it]

[LightGBM] [Info] Number of positive: 4760, number of negative: 35929
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1079
[LightGBM] [Info] Number of data points in the train set: 40689, number of used features: 51
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.116985 -> initscore=-2.021297
[LightGBM] [Info] Start training from score -2.021297


100%|██████████| 29/29 [05:23<00:00, 11.17s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NearestCentroid,0.82,0.77,0.77,0.85,0.15
XGBClassifier,0.9,0.71,0.71,0.9,0.53
LGBMClassifier,0.91,0.71,0.71,0.9,0.41
DecisionTreeClassifier,0.88,0.71,0.71,0.88,0.44
GaussianNB,0.85,0.7,0.7,0.86,0.14
BaggingClassifier,0.9,0.69,0.69,0.89,2.62
QuadraticDiscriminantAnalysis,0.57,0.69,0.69,0.65,0.5
RandomForestClassifier,0.91,0.69,0.69,0.9,7.31
LinearDiscriminantAnalysis,0.9,0.68,0.68,0.89,0.66
BernoulliNB,0.85,0.68,0.68,0.85,0.13


# Adult

In [17]:
DATASET_NAME = "adult"

DATASET_PATH = DATASET_DIR / DATASET_NAME
DATASET_PATH.mkdir(parents=True, exist_ok=True)

In [18]:
from ucimlrepo import fetch_ucirepo 

path = DATASET_PATH / "dataset.csv"

if path.exists():
    dataset = pd.read_csv(path)
    X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]

else:
    # fetch dataset 
    adult = fetch_ucirepo(id=2) 
      
    # data (as pandas dataframes) 
    X = adult.data.features
    y = adult.data.targets 
      
y.value_counts()

income
<=50K     24720
<=50K.    12435
>50K       7841
>50K.      3846
Name: count, dtype: int64

In [7]:
y.replace({"<=50K": 0, "<=50K.":0, ">50K": 1, ">50K.": 1}, inplace=True)
y.value_counts()

income
0    37155
1    11687
Name: count, dtype: int64

In [14]:
X_train, y_train, X_test, y_test = preprocess(X, y, DATASET_PATH)


Encoding categorical columns...
Scaling numerical columns...
X_train shape: (43957, 111)
X_test shape: (4885, 111)
y_train shape: (43957,)
y_test shape: (4885,)
X_sample shape: (8792, 111)
y_sample shape: (8792,)


In [15]:
train_and_save_models(X_train, y_train, X_test, y_test)


 97%|█████████▋| 28/29 [09:47<00:19, 19.24s/it] 

[LightGBM] [Info] Number of positive: 10518, number of negative: 33439
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006173 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 918
[LightGBM] [Info] Number of data points in the train set: 43957, number of used features: 103
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.239279 -> initscore=-1.156635
[LightGBM] [Info] Start training from score -1.156635


100%|██████████| 29/29 [09:48<00:00, 20.28s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LGBMClassifier,0.88,0.81,0.81,0.88,0.68
XGBClassifier,0.88,0.81,0.81,0.88,0.99
NearestCentroid,0.76,0.79,0.79,0.77,0.23
AdaBoostClassifier,0.86,0.78,0.78,0.86,4.26
BernoulliNB,0.78,0.78,0.78,0.79,0.17
RandomForestClassifier,0.85,0.78,0.78,0.85,9.35
SGDClassifier,0.85,0.78,0.78,0.85,1.95
BaggingClassifier,0.86,0.77,0.77,0.85,4.64
CalibratedClassifierCV,0.86,0.77,0.77,0.85,8.11
LinearSVC,0.85,0.77,0.77,0.85,27.38


# HELOC

In [18]:
DATASET_NAME = "heloc"

DATASET_PATH = DATASET_DIR / DATASET_NAME
DATASET_PATH.mkdir(parents=True, exist_ok=True)

In [20]:

path = DATASET_PATH / "heloc.csv"
dataset = pd.read_csv(path)
dataset.head()

Unnamed: 0,RiskPerformance,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,...,PercentInstallTrades,MSinceMostRecentInqexcl7days,NumInqLast6M,NumInqLast6Mexcl7days,NetFractionRevolvingBurden,NetFractionInstallBurden,NumRevolvingTradesWBalance,NumInstallTradesWBalance,NumBank2NatlTradesWHighUtilization,PercentTradesWBalance
0,Bad,55,144,4,84,20,3,0,83,2,...,43,0,0,0,33,-8,8,1,1,69
1,Bad,61,58,15,41,2,4,4,100,-7,...,67,0,0,0,0,-8,0,-8,-8,0
2,Bad,67,66,5,24,9,0,0,100,-7,...,44,0,4,4,53,66,4,2,1,86
3,Bad,66,169,1,73,28,1,1,93,76,...,57,0,5,4,72,83,6,4,3,91
4,Bad,81,333,27,132,12,0,0,100,-7,...,25,0,1,1,51,89,3,1,0,80


In [21]:
X, y = dataset.iloc[:, 1:], dataset.iloc[:, 0]
y.value_counts()

RiskPerformance
Bad     5459
Good    5000
Name: count, dtype: int64

In [22]:
y = y.replace({"Bad":0, "Good": 1}).astype(int)
y.value_counts()

RiskPerformance
0    5459
1    5000
Name: count, dtype: int64

In [23]:
X_train, y_train, X_test, y_test = preprocess(X, y, DATASET_PATH)


Scaling numerical columns...
X_train shape: (9413, 23)
X_test shape: (1046, 23)
y_train shape: (9413,)
y_test shape: (1046,)
X_sample shape: (1883, 23)
y_sample shape: (1883,)


In [24]:
train_and_save_models(X_train, y_train, X_test, y_test)


 97%|█████████▋| 28/29 [00:25<00:00,  1.11it/s]

[LightGBM] [Info] Number of positive: 4496, number of negative: 4917
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1529
[LightGBM] [Info] Number of data points in the train set: 9413, number of used features: 23
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.477637 -> initscore=-0.089510
[LightGBM] [Info] Start training from score -0.089510


100%|██████████| 29/29 [00:26<00:00,  1.11it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SVC,0.72,0.72,0.72,0.72,3.44
AdaBoostClassifier,0.72,0.72,0.72,0.72,0.4
LinearDiscriminantAnalysis,0.71,0.71,0.71,0.71,0.36
RidgeClassifierCV,0.71,0.71,0.71,0.71,0.46
RidgeClassifier,0.71,0.71,0.71,0.71,0.05
LGBMClassifier,0.71,0.71,0.71,0.71,0.24
LinearSVC,0.71,0.71,0.71,0.71,0.52
CalibratedClassifierCV,0.71,0.71,0.71,0.71,0.17
LogisticRegression,0.71,0.71,0.71,0.71,0.17
BernoulliNB,0.71,0.71,0.71,0.71,0.03


# Gesture Phase

In [37]:
DATASET_NAME = "gesture_phase"

DATASET_PATH = DATASET_DIR / DATASET_NAME
DATASET_PATH.mkdir(parents=True, exist_ok=True)

In [38]:
dataset = pd.read_csv(DATASET_PATH / f"{DATASET_NAME}.csv")
dataset = dataset.iloc[:, 20:]
dataset

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,24,25,26,27,28,29,30,31,32,Phase
0,-0.01,-0.00,0.00,0.01,0.01,0.00,-0.00,0.00,-0.00,0.01,...,0.00,0.01,0.01,0.00,0.01,0.00,0.00,0.00,0.00,D
1,0.00,0.00,-0.00,0.00,0.00,0.00,-0.00,-0.00,0.00,0.00,...,-0.00,0.01,0.01,0.00,0.00,0.00,0.00,0.00,0.00,D
2,-0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,0.00,0.00,0.00,...,-0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,D
3,-0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,-0.00,0.00,0.00,...,-0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,D
4,-0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,0.00,0.00,0.00,...,-0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9868,-0.00,-0.01,0.00,-0.00,0.00,-0.00,-0.00,-0.00,0.00,0.00,...,-0.00,0.01,0.00,0.01,0.00,0.00,0.00,0.00,0.00,D
9869,-0.00,0.00,-0.00,0.00,-0.00,0.00,-0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,D
9870,0.00,0.01,-0.00,0.00,-0.00,0.00,0.00,0.01,-0.00,0.00,...,0.00,0.01,0.00,0.01,0.00,0.00,0.00,0.00,0.00,D
9871,0.00,0.01,-0.00,0.00,-0.00,0.00,0.00,0.01,-0.00,0.00,...,0.00,0.01,0.00,0.01,0.00,0.00,0.00,0.00,0.00,D


In [39]:
X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]

In [40]:
y = y.replace({"S":0,"D":1,"P":2, "R":3, "H":4}).astype(int)
y.value_counts()

Phase
0    2950
1    2741
2    2097
3    1087
4     998
Name: count, dtype: int64

In [41]:
X_train, y_train, X_test, y_test = preprocess(X, y, DATASET_PATH)

Scaling numerical columns...
X_train shape: (8885, 32)
X_test shape: (988, 32)
y_train shape: (8885,)
y_test shape: (988,)
X_sample shape: (1777, 32)
y_sample shape: (1777,)


In [42]:

train_and_save_models(X_train, y_train, X_test, y_test)

 97%|█████████▋| 28/29 [00:48<00:01,  1.47s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001991 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8160
[LightGBM] [Info] Number of data points in the train set: 8885, number of used features: 32
[LightGBM] [Info] Start training from score -1.207920
[LightGBM] [Info] Start training from score -1.281362
[LightGBM] [Info] Start training from score -1.549376
[LightGBM] [Info] Start training from score -2.206610
[LightGBM] [Info] Start training from score -2.291950


100%|██████████| 29/29 [00:49<00:00,  1.71s/it]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.68,0.63,,0.67,1.92
ExtraTreesClassifier,0.69,0.63,,0.68,1.91
LGBMClassifier,0.68,0.63,,0.67,0.96
RandomForestClassifier,0.68,0.62,,0.67,10.2
LabelPropagation,0.61,0.56,,0.6,3.13
LabelSpreading,0.61,0.56,,0.6,4.68
BaggingClassifier,0.61,0.55,,0.61,5.97
KNeighborsClassifier,0.59,0.52,,0.58,0.16
ExtraTreeClassifier,0.51,0.48,,0.51,0.05
DecisionTreeClassifier,0.51,0.48,,0.51,0.84
