In [29]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=5000, n_features=50, 
                           n_informative=10,
                           n_redundant=25, n_repeated=15, 
                           n_clusters_per_class=5,
                           flip_y=0.05, class_sep=0.5, 
                           random_state=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [30]:
"""
# As an alternative to the make_classification synthetic data,
# you may decide to use the Madelon dataset by using the code
# in this commented cell

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
data = fetch_openml(name='madelon')
X = data.data.astype(float)
X = X / X.mean()
y = (data.target=='2').astype(float)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)
"""

"\n# As an alternative to the make_classification synthetic data,\n# you may decide to use the Madelon dataset by using the code\n# in this commented cell\n\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.model_selection import train_test_split\ndata = fetch_openml(name='madelon')\nX = data.data.astype(float)\nX = X / X.mean()\ny = (data.target=='2').astype(float)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)\n"

In [31]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import log_loss, roc_auc_score, accuracy_score

model_1 = SVC(probability=True, random_state=0)
model_2 = RandomForestClassifier(random_state=0)
model_3 = KNeighborsClassifier()

In [32]:
model_1.fit(X_train, y_train)
model_2.fit(X_train, y_train)
model_3.fit(X_train, y_train)

KNeighborsClassifier()

In [33]:
import numpy as np
from scipy.stats import mode

preds = np.stack([model_1.predict(X_test),
                  model_2.predict(X_test),
                  model_3.predict(X_test)]).T

max_voting = np.apply_along_axis(mode, 1, preds)[:,0]

In [34]:
discordant = np.sum(np.var(preds, axis=1) > 0) / len(y_test)
print(f"{discordant:0.2f}") 

0.24


In [35]:
for i, model in enumerate(['SVC', 'RF ', 'KNN']):
    acc = accuracy_score(y_true=y_test, y_pred=preds[:, i])
    print(f"Accuracy for model {model} is: {acc:0.3f}")

Accuracy for model SVC is: 0.804
Accuracy for model RF  is: 0.793
Accuracy for model KNN is: 0.805


In [36]:
max_voting_accuray = accuracy_score(y_true=y_test, y_pred=max_voting)
print(f"Accuracy for majority voting is: {max_voting_accuray:0.3f}")

Accuracy for majority voting is: 0.817


In [37]:
proba = np.stack([model_1.predict_proba(X_test)[:, 1],
                  model_2.predict_proba(X_test)[:, 1],
                  model_3.predict_proba(X_test)[:, 1]]).T

In [38]:
for i, model in enumerate(['SVC', 'RF ', 'KNN']):
    ras = roc_auc_score(y_true=y_test, y_score=proba[:, i])
    print(f"ROC-AUC for model {model} is: {ras:0.5f}")

ROC-AUC for model SVC is: 0.88126
ROC-AUC for model RF  is: 0.87685
ROC-AUC for model KNN is: 0.87511


In [39]:
arithmetic = proba.mean(axis=1)
ras = roc_auc_score(y_true=y_test, y_score=arithmetic)
print(f"Mean averaging ROC-AUC is: {ras:0.5f}")

Mean averaging ROC-AUC is: 0.90192


In [40]:
geometric = proba.prod(axis=1)**(1/3)
ras = roc_auc_score(y_true=y_test, y_score=geometric)
print(f"Geometric averaging ROC-AUC is: {ras:0.5f}")

Geometric averaging ROC-AUC is: 0.89857


In [41]:
harmonic = 1 / np.mean(1. / (proba + 0.00001), axis=1)
ras = roc_auc_score(y_true=y_test, y_score=harmonic)
print(f"Geometric averaging ROC-AUC is: {ras:0.5f}")

Geometric averaging ROC-AUC is: 0.89916


In [66]:
n = 3
mean_of_powers = np.mean(proba**n, axis=1)**(1/n)
ras = roc_auc_score(y_true=y_test, y_score=mean_of_powers)
print(f"Mean of powers averaging ROC-AUC is: {ras:0.5f}")

Mean of powers averaging ROC-AUC is: 0.89996


In [45]:
logarithmic = np.expm1(np.mean(np.log1p(proba), axis=1))
ras = roc_auc_score(y_true=y_test, y_score=logarithmic)
print(f"Logarithmic averaging ROC-AUC is: {ras:0.5f}")

Logarithmic averaging ROC-AUC is: 0.90179


In [20]:
cormat = np.corrcoef(proba.T)
np.fill_diagonal(cormat, 0.0)
W = 1 / np.mean(cormat, axis=1)
W = W / sum(W) # normalizing to sum==1.0
weighted = proba.dot(W)
ras = roc_auc_score(y_true=y_test, y_score=weighted)
print(f"Weighted averaging ROC-AUC is: {ras:0.5f}")

Weighted averaging ROC-AUC is: 0.90206


In [21]:
from sklearn.preprocessing import MinMaxScaler
arithmetic = MinMaxScaler().fit_transform(proba).mean(axis=1)
ras = roc_auc_score(y_true=y_test, y_score=arithmetic)
print(f"Mean averaging ROC-AUC is: {ras:0.5f}")

Mean averaging ROC-AUC is: 0.90180


In [22]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=0)
scores = list()

for k, (train_index, test_index) in enumerate(kf.split(X_train)):
    model_1.fit(X_train[train_index, :], y_train[train_index])
    model_2.fit(X_train[train_index, :], y_train[train_index])
    model_3.fit(X_train[train_index, :], y_train[train_index])
    
    proba = np.stack([model_1.predict_proba(X_train[test_index, :])[:, 1],
                      model_2.predict_proba(X_train[test_index, :])[:, 1],
                      model_3.predict_proba(X_train[test_index, :])[:, 1]]).T
    
    arithmetic = proba.mean(axis=1)
    ras = roc_auc_score(y_true=y_train[test_index], y_score=arithmetic)
    scores.append(ras)
    print(f"FOLD {k} Mean averaging ROC-AUC is: {ras:0.5f}")
    
print(f"CV Mean averaging ROC-AUC is: {np.mean(scores):0.5f}")

FOLD 0 Mean averaging ROC-AUC is: 0.88202
FOLD 1 Mean averaging ROC-AUC is: 0.87379
FOLD 2 Mean averaging ROC-AUC is: 0.91092
FOLD 3 Mean averaging ROC-AUC is: 0.87909
FOLD 4 Mean averaging ROC-AUC is: 0.89224
CV Mean averaging ROC-AUC is: 0.88761


In [24]:
X_blend, X_holdout, y_blend, y_holdout = train_test_split(X_train, y_train, test_size=0.25, random_state=0)

model_1.fit(X_blend, y_blend)
model_2.fit(X_blend, y_blend)
model_3.fit(X_blend, y_blend)

proba = np.stack([model_1.predict_proba(X_holdout)[:, 1],
                  model_2.predict_proba(X_holdout)[:, 1],
                  model_3.predict_proba(X_holdout)[:, 1]]).T

In [26]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
proba = scaler.fit_transform(proba)

In [27]:
from sklearn.linear_model import LogisticRegression
blender = LogisticRegression(solver='liblinear')
blender.fit(proba, y_holdout)

LogisticRegression(solver='liblinear')

In [28]:
print(blender.coef_)

[[0.78911314 0.47202077 0.75115854]]


In [20]:
test_proba = np.stack([model_1.predict_proba(X_test)[:, 1],
                       model_2.predict_proba(X_test)[:, 1],
                       model_3.predict_proba(X_test)[:, 1]]).T

blending = blender.predict_proba(test_proba)[:, 1]
ras = roc_auc_score(y_true=y_test, y_score=blending)
print(f"ROC-AUC for linear blending {model} is: {ras:0.5f}")

ROC-AUC for linear blending KNN is: 0.88621


In [21]:
blender = RandomForestClassifier()
blender.fit(proba, y_holdout)

test_proba = np.stack([model_1.predict_proba(X_test)[:, 1],
                       model_2.predict_proba(X_test)[:, 1],
                       model_3.predict_proba(X_test)[:, 1]]).T

blending = blender.predict_proba(test_proba)[:, 1]
ras = roc_auc_score(y_true=y_test, y_score=blending)
print(f"ROC-AUC for non-linear blending {model} is: {ras:0.5f}")

ROC-AUC for non-linear blending KNN is: 0.83862


In [22]:
X_blend, X_holdout, y_blend, y_holdout = train_test_split(X_train, y_train, test_size=0.5, random_state=0)

model_1.fit(X_blend, y_blend)
model_2.fit(X_blend, y_blend)
model_3.fit(X_blend, y_blend)

proba = np.stack([model_1.predict_proba(X_holdout)[:, 1],
                  model_2.predict_proba(X_holdout)[:, 1],
                  model_3.predict_proba(X_holdout)[:, 1]]).T

In [23]:
iterations = 100

proba = np.stack([model_1.predict_proba(X_holdout)[:, 1],
                  model_2.predict_proba(X_holdout)[:, 1],
                  model_3.predict_proba(X_holdout)[:, 1]]).T

baseline = 0.5
print(f"starting baseline is {baseline:0.5f}")

models = []

for i in range(iterations):
    challengers = list()
    for j in range(proba.shape[1]):
        new_proba = np.stack(proba[:, models + [j]])
        score = roc_auc_score(y_true=y_holdout, 
                              y_score=np.mean(new_proba, axis=1))
        challengers.append([score, j])
    
    challengers = sorted(challengers, key=lambda x: x[0], reverse=True)
    best_score, best_model = challengers[0]
    if best_score > baseline:
        print(f"Adding model_{best_model+1} to the ensemble", end=': ') 
        print(f"ROC-AUC increases score to {best_score:0.5f}")
        models.append(best_model)
        baseline = best_score
    else:
        print("Cannot improve further - Stopping")
        break

starting baseline is 0.50000
Adding model_3 to the ensemble: ROC-AUC increases score to 0.84298
Adding model_2 to the ensemble: ROC-AUC increases score to 0.86533
Adding model_1 to the ensemble: ROC-AUC increases score to 0.86644
Adding model_3 to the ensemble: ROC-AUC increases score to 0.86691
Adding model_2 to the ensemble: ROC-AUC increases score to 0.86779
Cannot improve furthermore - Stopping


In [24]:
from collections import Counter

freqs = Counter(models)
weights = {key: freq/len(models) for key, freq in freqs.items()}
print(weights)

{2: 0.4, 1: 0.4, 0: 0.2}


In [25]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=0)
scores = list()

first_lvl_oof = np.zeros((len(X_train), 3))
fist_lvl_preds = np.zeros((len(X_test), 3))

for k, (train_index, val_index) in enumerate(kf.split(X_train)):
    model_1.fit(X_train[train_index, :], y_train[train_index])
    first_lvl_oof[val_index, 0] = model_1.predict_proba(X_train[val_index, :])[:, 1]
    
    model_2.fit(X_train[train_index, :], y_train[train_index])
    first_lvl_oof[val_index, 1] = model_2.predict_proba(X_train[val_index, :])[:, 1]
    
    model_3.fit(X_train[train_index, :], y_train[train_index])
    first_lvl_oof[val_index, 2] = model_3.predict_proba(X_train[val_index, :])[:, 1]

In [26]:
model_1.fit(X_train, y_train)
fist_lvl_preds[:, 0] = model_1.predict_proba(X_test)[:, 1]

model_2.fit(X_train, y_train)
fist_lvl_preds[:, 1] = model_2.predict_proba(X_test)[:, 1]

model_3.fit(X_train, y_train)
fist_lvl_preds[:, 2] = model_3.predict_proba(X_test)[:, 1]

In [27]:
second_lvl_oof = np.zeros((len(X_train), 3))
second_lvl_preds = np.zeros((len(X_test), 3))

for k, (train_index, val_index) in enumerate(kf.split(X_train)):
    skip_X_train = np.hstack([X_train, first_lvl_oof])
    model_1.fit(skip_X_train[train_index, :], y_train[train_index])
    second_lvl_oof[val_index, 0] = model_1.predict_proba(skip_X_train[val_index, :])[:, 1]
    
    model_2.fit(skip_X_train[train_index, :], y_train[train_index])
    second_lvl_oof[val_index, 1] = model_2.predict_proba(skip_X_train[val_index, :])[:, 1]
    
    model_3.fit(skip_X_train[train_index, :], y_train[train_index])
    second_lvl_oof[val_index, 2] = model_3.predict_proba(skip_X_train[val_index, :])[:, 1]

In [28]:
skip_X_test = np.hstack([X_test, fist_lvl_preds])

model_1.fit(skip_X_train, y_train)
second_lvl_preds[:, 0] = model_1.predict_proba(skip_X_test)[:, 1]

model_2.fit(skip_X_train, y_train)
second_lvl_preds[:, 1] = model_2.predict_proba(skip_X_test)[:, 1]

model_3.fit(skip_X_train, y_train)
second_lvl_preds[:, 2] = model_3.predict_proba(skip_X_test)[:, 1]

In [29]:
arithmetic = second_lvl_preds.mean(axis=1)
ras = roc_auc_score(y_true=y_test, y_score=arithmetic)
scores.append(ras)
print(f"Stacking ROC-AUC is: {ras:0.5f}")

Stacking ROC-AUC is: 0.90424
