In [None]:
%reload_ext nb_black

In [None]:
import sys

sys.path.insert(
    0,
    r"C:\Users\Asus\Desktop\Repo\MasterThesis_RI\Python-Real-World-Machine-Learning\Module 2\Chapter 5",
)

import warnings

warnings.filterwarnings("ignore")

# Basic Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Modelling
# Classification
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import lightgbm as lgb
from sklearn import svm
from sklearn.svm import SVC

# Semi-Supervised Learning
from sklearn.semi_supervised import (
    LabelPropagation,
    LabelSpreading,
    SelfTrainingClassifier,
)

# Chapter 5
from SelfLearning import SelfLearningModel
from scikitWQDA import WQDA

# Model Selection
from sklearn.model_selection import train_test_split, GridSearchCV

# Metrics
from sklearn.metrics import (
    f1_score,
    roc_auc_score,
    roc_curve,
    accuracy_score,
    confusion_matrix,
    plot_confusion_matrix,
    ConfusionMatrixDisplay,
)

# Ensembling
from sklearn.ensemble import *

# Balancing
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Binning
import woeBinningPandas

# Create Unique ID
import uuid

## Data Preprocessing

In [None]:
def data_preprocessing(df, accepted_flag, target, train_ratio):
    """
    The goal of this function is to load the original dataset, split it into accepts and rejects,
    add ids, which can later be used for merging. For the rejects to further perform train / test split for the rejects

    Parameters
    ----------

    df : name of the original dataset in quotation marks, csv format
    accepted_flag: name of the accepted flag; Binary: 1 if accepted, 0 if rejected
    target : name of the target column
    train_ratio : percentage used for training; Continuous (0,1)

    Return
    ------
    a : accepted data
    r : rejected data
    r_dev : rejected trainining data
    r_test : rejected testing data
    """
    # Load data
    data = pd.read_csv(df)

    # Accepted

    ## Create separate dataset with accepts
    dfa = data[data[accepted_flag] == 1]
    dfa = dfa.drop([accepted_flag], axis=1)
    ## Rename target variable as "target"
    dfa = dfa.rename(columns={target: "target"})
    ## Add id to the dataset, which can later be used for merging
    dfa["id"] = dfa.index.to_series().map(lambda x: uuid.uuid4())

    # Rejected

    ## Create separate dataset with accepts
    dfr = data[data[accepted_flag] == 0]
    dfr = dfr.drop([accepted_flag], axis=1)
    ## Add id to the dataset, which can later be used for merging
    dfr["id"] = dfr.index.to_series().map(lambda x: uuid.uuid4())
    ## Train/Test Split (without labels)
    ### Shuffle the dataset
    shuffle_df = dfr.sample(frac=1)
    ### Define a size for the train set
    train_size = int(train_ratio * len(shuffle_df))
    ### Split the dataset
    dfr_dev = shuffle_df[:train_size]
    dfr_test = shuffle_df[train_size:]
    ## Unlabel the rejects (i.e. drop the target)
    dfr_dev2 = dfr_dev
    dfr_test2 = dfr_test
    dfr_dev2 = dfr_dev2.drop([target], axis=1)
    dfr_test2 = dfr_test2.drop([target], axis=1)

    return dfa, dfr, dfr_dev2, dfr_dev2

In [None]:
a, r, r_dev, r_test = data_preprocessing("model_ds.csv", "is_accepted", "y", 0.7)

In [None]:
print(a.shape)  # accepted
print(r.shape)  # rejected
print(r_dev.shape)  # rejected train
print(r_test.shape)  # rejected test

In [None]:
# Create rejects datasets with the modelling columns only (for a dataset with 8 features)
r_dev_mod = r_dev.iloc[:, :9]
r_test_mod = r_test.iloc[:, :9]
# Create rejects datasets with the modelling columns + id
r_dev_mod_id = r_dev.iloc[:, :10]
r_dev_mod_id = r_test.iloc[:, :10]

In [None]:
# Load data
model_ds = pd.read_csv("model_ds.csv")

# Accepted
df3 = model_ds[model_ds["is_accepted"] == 1]
df3 = df3.drop(["is_accepted"], axis=1)
df3 = df3.rename(columns={"y": "target"})
df3["id"] = df3.index.to_series().map(lambda x: uuid.uuid4())
# Rejected
dfr = model_ds[model_ds["is_accepted"] == 0]
dfr = dfr.drop(["is_accepted"], axis=1)

# Create ids for rejects
dfr["id"] = dfr.index.to_series().map(lambda x: uuid.uuid4())

# Train/Test Split (without labels)
# Shuffle the dataset
shuffle_df = dfr.sample(frac=1)

# Define a size for your train set
train_size = int(0.7 * len(shuffle_df))

# Split your dataset
dfr_dev3 = shuffle_df[:train_size]
dfr_test3 = shuffle_df[train_size:]

# Drop y
dfr_dev2 = dfr_dev3
dfr_test2 = dfr_test3
dfr_dev3 = dfr_dev3.drop(["y"], axis=1)
dfr_test3 = dfr_test3.drop(["y"], axis=1)

In [None]:
def split_accepts(accepted_data):
    """
    Perform Undersampling and Split the data into training and testing sample.

    Parameters
    ----------
    accepted_data : accepts, dataframe

    Return
    ------
    os_data_X_2 : undersampled training modelling fields
    X_test_2 : undersampled test modelling fields
    y_train : undersampled training labels
    y_test : undersampled testing labels

    """

    # Create X and y
    X = accepted_data.loc[:, accepted_data.columns != "target"]
    y = accepted_data.loc[:, accepted_data.columns == "target"]

    # Train Test Split and Balance data
    os = RandomUnderSampler(sampling_strategy=0.5, random_state=7)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=7
    )
    columns = X_train.columns

    # Train
    os_data_X, os_data_y = os.fit_sample(X_train, y_train)
    os_data_X = pd.DataFrame(data=os_data_X, columns=columns)
    os_data_y = pd.DataFrame(data=os_data_y, columns=["target"])

    # Match names with Lending Club Data
    os_data_X_2 = os_data_X
    X_test_2 = X_test
    return os_data_X_2, X_test_2, os_data_y, y_test

In [None]:
os_data_X_2, X_test_2, os_data_y, y_test = split_accepts(a)

In [None]:
print(os_data_X_2.shape)
print(X_test_2.shape)

The selection of columns below is subject to iteration based on the modelling outcomes from the logistic regression, i.e. significance (p-values):

In [None]:
significant_columns = [
    "known_col_0",
    "known_col_1",
    "known_col_3",
    "known_col_4",
]

In [None]:
os_data_X_2 = os_data_X_2[significant_columns]
X_test_3 = X_test_2[significant_columns]
r_dev_mod = r_dev_mod[significant_columns]
r_test_mod = r_test_mod[significant_columns]

## Data Modelling

In [None]:
# Build Logistic regression
# Statmodels
X_in = sm.add_constant(os_data_X_2.astype(float))
logit_model = sm.Logit(os_data_y, X_in)
result3 = logit_model.fit()
print(result3.summary2())

In [None]:
# Sklearn
logreg = LogisticRegression(fit_intercept=True, penalty="none")
logreg.fit(os_data_X_2, os_data_y.values.ravel())
y_pred = logreg.predict(X_test_3)
print("Accuracy score Logistic Regression:", logreg.score(X_test_3, y_test))
logit_roc_auc = roc_auc_score(y_test, logreg.predict(X_test_3))
fpr, tpr, thresholds = roc_curve(y_test, logreg.predict_proba(X_test_3)[:, 1])
plt.figure()
plt.plot(fpr, tpr, label="Logistic Regression (area = %0.2f)" % logit_roc_auc)
plt.plot([0, 1], [0, 1], "r--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver operating characteristic")
plt.legend(loc="lower right")
plt.savefig("Log_ROC")
plt.show()

In [None]:
#Test set with labels
test_labels = pd.merge(
    y_test,
    X_test_2,
    how="inner",
    left_index=True,
    right_index=True,
)


In [None]:
# Predictions on testset
test_pred = logreg.predict_proba(X_test_3)[:, 1]
test_pred2 = pd.DataFrame(data=test_pred, columns=["prediction"])
test_pred2["count"] = test_pred2.groupby("prediction")["prediction"].transform("count")
test_pred2.groupby(["prediction"]).count()
test_pred2.describe()

In [None]:
# Join predictions with test new
pred_test_kgb = pd.DataFrame(
    data=test_pred, columns=["prediction_beforeRI"], index=y_test.index.copy()
)
pred_test1 = pd.merge(
    test_labels,
    pred_test_kgb[["prediction_beforeRI"]],
    how="inner",
    left_index=True,
    right_index=True,
)

# pred_test1.dropna(subset=["prediction_beforeRI"], inplace=True)
pred_test1

In [None]:
pred_test1.shape  # Expected nr. of observations

In [None]:
def all_metrics(pred_label, true_label, model):
    """""
    pred_label = predicted label of the model
    true_label = true label
    model = model name
    """ ""

    # F1 score
    f1_stat = f1_score(pred_label, true_label, average="weighted")

    # Confusion matrix
    cm = confusion_matrix(pred_label, true_label, labels=model.classes_)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
    fig = disp.plot()

    return print("F1_stat ", model, "is: ", f1_stat, fig)

### Random Forest

In [None]:
rf = RandomForestClassifier(max_depth=2, random_state=0)
rf.fit(os_data_X_2, os_data_y.values.ravel())
y_pred = rf.predict(X_test_3)
all_metrics(y_pred, y_test, rf)

### Decision Tree

In [None]:
dt = DecisionTreeClassifier(random_state=0)
dt.fit(os_data_X_2, os_data_y.values.ravel())
y_pred = dt.predict(X_test_3)
all_metrics(y_pred, y_test, dt)

In [None]:
unique, counts = np.unique(y_pred, return_counts=True)
dict(zip(unique, counts))

### SVM

In [None]:
svm = svm.SVC()
svm.fit(os_data_X_2, os_data_y.values.ravel())
y_pred = svm.predict(X_test_3)
all_metrics(y_pred, y_test, svm)

### LGBM Classifier

In [None]:
lgbm = lgb.LGBMClassifier()
lgbm.fit(os_data_X_2, os_data_y.values.ravel())
y_pred = lgbm.predict(X_test_3)
all_metrics(y_pred, y_test, lgbm)

### Function for predictions

Predictions1 uses predict_proba; predictions 2 uses predict

In [None]:
def predictions1(model, treshold):
    # Join predictions with train new
    pred = model.predict_proba(r_dev_mod)[:, 1]
    pred2 = pd.DataFrame(
        data=pred,
        columns=["prediction2"],
        index=r_dev_mod_id.index.copy(),
    )
    pred2["prediction_beforeRI"] = pred2["prediction2"].apply(
        lambda x: 0 if (x < treshold) else 1
    )
    outcome = pd.merge(
        r_dev_mod_id,
        pred2[["prediction_beforeRI"]],
        how="inner",
        left_index=True,
        right_index=True,
    )
    # pred_test1.dropna(subset=["prediction_beforeRI"], inplace=True)
    outcome = outcome[["id", "prediction_beforeRI"]]
    return outcome

In [None]:
def predictions2(model, treshold):
    # Join predictions with train new
    pred = model.predict(r_dev_mod)
    pred2 = pd.DataFrame(
        data=pred,
        columns=["prediction2"],
        index=r_dev_mod_id.index.copy(),
    )
    pred2["prediction_beforeRI"] = pred2["prediction2"].apply(
        lambda x: 0 if (x < treshold) else 1
    )
    outcome = pd.merge(
        r_dev_mod_id,
        pred2[["prediction_beforeRI"]],
        how="inner",
        left_index=True,
        right_index=True,
    )
    # pred_test1.dropna(subset=["prediction_beforeRI"], inplace=True)
    outcome = outcome[["id", "prediction_beforeRI"]]
    return outcome

### Predictions

In [None]:
ri1_train = predictions1(logreg, 0.1)  # Logistic Regression
ri2_train = predictions1(dt, 0.1)  # Decision Tree
ri3_train = predictions1(rf, 0.1)  # Random Forest
ri4_train = predictions2(svm, 0.1)  # SVM
ri5_train = predictions2(lgbm, 0.1)  # Light GBM ranking 0.2

### Evaluation Measures

In [None]:
def print_results(data, state, prediction):
    print(
        "The number of accurately classified cases ",
        state,
        " is: ",
        data[
            (data.target == 1) & (data[prediction] == 1)
            | (data.target == 0) & (data[prediction] == 0)
        ].shape[0],
    )
    print(
        "The number of misclassified cases ",
        state,
        " is: ",
        data[
            (data.target == 1) & (data[prediction] == 0)
            | (data.target == 0) & (data[prediction] == 1)
        ].shape[0],
    )

In [None]:
def evaluation(ri_data):  # ri1_train, ri2_train, etc..

    # TRAIN NEW
    # Join labels to train set
    # Accepts
    train_accepts = pd.merge(
        os_data_X_2,
        os_data_y[["target"]],
        how="left",
        left_index=True,
        right_index=True,
    )

    train_accepts["Flag1"] = "Accept"

    # Rejects
    train_rejects = pd.merge(
        r_dev_mod,
        ri_data[["prediction_beforeRI"]],
        how="left",
        left_index=True,
        right_index=True,
    )

    # Drop Null values and align naming
    train_accepts.dropna(subset=["target"], inplace=True)
    train_rejects.rename(columns={"prediction_beforeRI": "target"}, inplace=True)

    # Concatenate Train Accepts and Train Rejects
    train_new = pd.concat([train_accepts, train_rejects])

    # Flag
    train_new["Flag"] = train_new["Flag1"].apply(
        lambda x: "Accept" if x == "Accept" else "Reject"
    )
    train_new = train_new.drop(columns=["Flag1"])

    # Retrain KGB Model

    # Split
    X_new = train_new.loc[:, train_new.columns != "target"]
    X_new = X_new.loc[:, X_new.columns != "Flag"]
    y_new = train_new.loc[:, train_new.columns == "target"]
    X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(
        X_new, y_new, test_size=0.3, random_state=7
    )

    # Keep only columns for modelling
    os_data_X_2_new = X_train_new[significant_columns]
    X_test_2_new = X_test_new[significant_columns]

    # Build Logistic regression
    logreg = LogisticRegression(fit_intercept=False, penalty="none")
    logreg.fit(os_data_X_2_new, y_train_new.values.ravel())

    # TEST NEW
    # Join labels to test set
    # Accepts
    test_accepts = pd.merge(
        X_test_2, y_test[["target"]], how="left", left_index=True, right_index=True
    )

    test_accepts["Flag1"] = "Accept"

    # Rejects
    test_rejects = pd.merge(
        r_dev_mod,
        ri_data[["prediction_beforeRI"]],
        how="left",
        left_index=True,
        right_index=True,
    )

    # Drop Null values and align naming
    test_accepts.dropna(subset=["target"], inplace=True)
    test_rejects.rename(columns={"prediction_beforeRI": "target"}, inplace=True)
    test_rejects.dropna(subset=["target"], inplace=True)

    # Concatenate Test Accepts and Test Rejects
    test_new = pd.concat([test_accepts, test_rejects])

    # Flag
    test_new["Flag"] = test_new["Flag1"].apply(
        lambda x: "Accept" if x == "Accept" else "Reject"
    )

    test_new = test_new.drop(columns=["Flag1"])

    test_new = test_new.reset_index(drop=True)

    X_test_new = test_new.loc[:, test_new.columns != "target"]
    X_test_new2 = X_test_new.loc[:, X_test_new.columns != "Flag"]
    X_test_new4 = X_test_new2.loc[:, X_test_new2.columns != "id"]

    X_test_new5 = X_test_new4[significant_columns]

    y_test_new = test_new.loc[:, test_new.columns == "target"]

    # PREDICTIONS
    # Join predictions with test new
    test_new_pred = logreg.predict_proba(X_test_new5)[:, 1]
    pred_test_df = pd.DataFrame(
        data=test_new_pred, columns=["prediction_afterRI"], index=test_new.index.copy()
    )
    pred_test = pd.merge(
        test_new,
        pred_test_df[["prediction_afterRI"]],
        how="inner",
        left_index=True,
        right_index=True,
    )
    # pred_test1.dropna(subset=["prediction_beforeRI"], inplace=True)
    pred_test

    a1 = pred_test1[["id", "target", "prediction_beforeRI"]]
    a2 = pred_test[["id", "Flag", "prediction_afterRI"]]

    # Join (outer to get full sample)
    a1_a2_outer = pd.merge(a1, a2, how="outer", on="id")
    # Join (inner to get only accepts and be able to compare)
    a1_a2_inner = pd.merge(a1, a2, how="inner", on="id")
    a1_a2_inner

    # Make binary predictions based on cutoff 0.3
    a1_a2_inner["prediction_beforeRI_binary"] = a1_a2_inner[
        "prediction_beforeRI"
    ].apply(lambda x: 0 if (x < 0.3) else 1)
    a1_a2_inner["prediction_afterRI_binary"] = a1_a2_inner["prediction_afterRI"].apply(
        lambda x: 0 if (x < 0.3) else 1
    )

    print_results(a1_a2_inner, "before RI", "prediction_beforeRI_binary")
    print_results(a1_a2_inner, "after RI", "prediction_afterRI_binary")

In [None]:
evaluation(ri1_train)  # Simple Augmentation - Log Reg

In [None]:
evaluation(ri2_train)  # Decision Tree

In [None]:
evaluation(ri3_train)  # Random Forest

In [None]:
evaluation(ri4_train)  # SVM

In [None]:
evaluation(ri5_train)  # LGBM

# Semi-Supervised Learning

Based on existing literature semi-supervised learning are chosen for RI. The method is suitable for the problem because the labels of the accepted population (good/bad) are known and the labels of the rejected population are unknown. Without ignoring the inherent bias between accepts and rejects, semi-supervised methods use both labelled and unlabelled data during fit. <br> **1. Data preparation:** the goal is to create initial dataframe, which contains the known training data and training labels of the accepts and the known training data of the rejects. The training labels of the rejects are unknown, and are therefore labelled with a default value of -1. <br> **2. Train/Test Split** The resulting dataset is again split into explanatory variables and target in order to fit the model. <br> **3. Fit model:** The semi-supervised model is fit <br> **4. Predictions:** Predictions are made using the known testing data and testing labels of the accepts <br> **5. Evaluation:**: The results of the model before and after reject inference are compared  

In [None]:
def ssl_prep(X_accept, y_accept, X_reject):
    """
    Parameters
    ----------

    X_train_acc : training data of accepted population
    y_train_acc: training lables of accepted population
    X_train_rej: training data of rejected population

    Return
    ------
    df : data of accepted and rejected population

    """
    # Merge explanatory and target in accepts
    accepts = pd.merge(
        X_accept, y_accept, how="left", left_index=True, right_index=True
    )
    # Create accept flag
    accepts["Flag1"] = "Accept"

    # Merge accepts and rejects
    df = pd.concat([accepts, X_reject])

    # If accepted use accept label, if rejected use -1 (default value for unlabelled entries) - hard-coded for now
    conditions = [
        (df["Flag1"] == "Accept") & (df["target"] == 1),
        (df["Flag1"] == "Accept") & (df["target"] == 0),
    ]
    choices = [1, 0]

    # New target is called unlabel
    df["unlabel"] = np.select(conditions, choices, -1)

    # Select columns for modelling - hard-coded for now - can be moved outside of the function
    df = df[["known_col_0", "known_col_1", "known_col_3", "known_col_4", "unlabel"]]

    return df

In [None]:
def ssl_split(df, target):
    """
    Parameters
    ----------

    df : dataframe of accepted and rejected population, including data and labels
    target: string name of the target column, should be passed in quotation marks (e.g. "target")

    Return
    ------
    X_train: training data of accepted and rejected population, ready to be fed into the semi-supervised model
    y_train: training labels of accepted and rejected population, ready to be fed into the semi-supervised model

    """

    X_train = df.loc[:, df.columns != target]
    y_train = df.loc[:, df.columns == target]
    return X_train, y_train

In [None]:
def ssl_model_selftraining(X_train, y_train, model):
    """
    Parameters
    ----------

    X_train : training data of accepted and rejected population
    y_train : training lables of accepted population (0,1) and rejected population (-1)
    model : semi-supervised learning model from sklearn (Self-Training Classifier)

    Return
    ------

    ssl: trained semi-supervised learning model

    """

    # Fit SSL moodel
    base = SVC(probability=True, gamma="auto")
    model = model(base)
    labels = np.copy(y_train)
    data = np.copy(X_train)
    ssl = model.fit(data, labels)
    return ssl

In [None]:
def ssl_model_label(X_train, y_train, model):
    """
    Parameters
    ----------

    X_train : training data of accepted and rejected population
    y_train : training lables of accepted population (0,1) and rejected population (-1)
    model : semi-supervised learning model from sklearn (Label Propagation, Label Spreading)

    Return
    ------

    ssl: trained semi-supervised learning model

    """

    # Fit SSL moodel
    model = model()
    labels = np.copy(y_train)
    data = np.copy(X_train)
    ssl = model.fit(data, labels)
    return ssl

In [None]:
def ssl_predictions(ssl, X_test):
    """
    Parameters
    ----------

    ssl : trained semi-supervised learning model
    X_test : testing data of accepted and rejected population for predictions

    Return
    ------

    Predictions before RI (binary)
    Predictions after RI (binary)

    """
    # Make predictions
    y_pred = ssl.predict(X_test)
    # Convert y_pred array to pandas dataframe
    pred_test = pd.DataFrame(
        data=y_pred,
        columns=["prediction_afterRI"],
        index=X_test.index.copy(),
    )
    a1 = pred_test1[["id", "target", "prediction_beforeRI"]]  # hard-coded for now
    a2 = pred_test[["prediction_afterRI"]]  # hard-coded for now

    # Merge a1 and a2
    a1_a2_inner = pd.merge(
        a1,
        a2,
        how="inner",
        left_index=True,
        right_index=True,
    )

    # Make final prediction
    a1_a2_inner["prediction_beforeRI_binary"] = a1_a2_inner[
        "prediction_beforeRI"
    ].apply(
        lambda x: 0 if (x < 0.3) else 1
    )  # hard-coded to 0.3 for now
    a1_a2_inner["prediction_afterRI_binary"] = a1_a2_inner["prediction_afterRI"].apply(
        lambda x: 0 if (x < 0.3) else 1  # hard-coded to 0.3 for now
    )

    print_results(a1_a2_inner, "before RI", "prediction_beforeRI_binary")
    print_results(a1_a2_inner, "after RI", "prediction_afterRI_binary")

### SSL Prep

In [None]:
# Train
train_new_model = ssl_prep(
    os_data_X_2,
    os_data_y,
    r_dev_mod,
)
# Test
test_new_model = ssl_prep(
    X_test_3,
    y_test,
    r_test_mod,
)
X_ssl, y_ssl = ssl_split(train_new_model, "unlabel")

### SSL Models & Predictions

In [None]:
self_training = ssl_model_selftraining(X_ssl, y_ssl, SelfTrainingClassifier)
ssl_predictions(self_training, X_test_3)

In [None]:
label_spreading = ssl_model_label(X_ssl, y_ssl, LabelSpreading)
ssl_predictions(label_spreading, X_test_3)

In [None]:
label_propagation = ssl_model_label(X_ssl, y_ssl, LabelPropagation)
ssl_predictions(label_propagation, X_test_3)

## Ensembling

In [None]:
estimators = [
    ("lp", label_propagation),
    ("lsp", label_spreading),
    ("st", self_training),
]

### Ensembling Models and Predictions

In [None]:
# Stacking Classifier
final_estimator = GradientBoostingClassifier(
    n_estimators=5, subsample=0.5, min_samples_leaf=25, max_features=1, random_state=42
)
sc = StackingClassifier(estimators=estimators, final_estimator=final_estimator)
# Hard Voting Classifier
hvc = VotingClassifier(estimators=estimators, voting="hard")
# Soft Voting Classifier
soft_vc = VotingClassifier(estimators=estimators, voting="soft", weights=[1, 0])

In [None]:
stacking_classifier = sc.fit(X_ssl, y_ssl)
ssl_predictions(stacking_classifier, X_test_3)

In [None]:
hard_voting_classifier = hvc.fit(X_ssl, y_ssl)
ssl_predictions(hard_voting_classifier, X_test_3)

In [None]:
soft_voting_classifier = hvc.fit(X_ssl, y_ssl)
ssl_predictions(soft_voting_classifier, X_test_3)