In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_classification
from sklearn.utils import resample

In [2]:
X, y = make_classification(n_samples=1000, n_classes=2, n_features=5, weights=[0.3, 0.7])

In [3]:
X

array([[-0.14718518,  0.52968096, -1.07245825,  0.23639544, -1.93184179],
       [-0.02811752,  0.99271945,  1.07405279,  0.0770931 , -0.94323267],
       [ 0.05524298, -0.00748501,  0.67697996, -0.69402818,  0.60185974],
       ...,
       [-0.01647253,  1.12847593,  1.41376771,  1.41885876, -0.90481201],
       [-0.01277286,  1.12113477,  1.44929355, -1.31871272, -0.86009984],
       [-0.00527592,  1.19434322,  1.64764246,  1.70825895, -0.82622579]])

In [4]:
y

array([0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1,

In [9]:
df = pd.DataFrame(X, columns=[f"col_{i+1}" for i in range(X.shape[1])])

In [10]:
df['target'] = y

In [11]:
df.head()

Unnamed: 0,col_1,col_2,col_3,col_4,col_5,target
0,-0.147185,0.529681,-1.072458,0.236395,-1.931842,0
1,-0.028118,0.992719,1.074053,0.077093,-0.943233,1
2,0.055243,-0.007485,0.67698,-0.694028,0.60186,1
3,0.145354,-0.031691,1.764047,0.234169,1.591328,1
4,0.089844,-0.103833,0.969517,0.139911,1.037867,0


In [12]:
df["target"].value_counts()

target
1    695
0    305
Name: count, dtype: int64

In [13]:
zero_class = df[df["target"] == 0]
one_class = df[df["target"] == 1]

In [16]:
zero_class.shape

(305, 6)

In [17]:
one_class.shape

(695, 6)

In [18]:
upsample_zero = resample(zero_class, n_samples=one_class.shape[0], replace=True)

In [19]:
upsample_zero.shape

(695, 6)

In [21]:
upsampled_df = pd.concat([one_class, upsample_zero])

In [23]:
upsampled_df.shape

(1390, 6)

In [24]:
downsample_one = resample(one_class, n_samples=zero_class.shape[0], replace=False)

In [25]:
downsample_one.shape

(305, 6)

In [26]:
downsampled_df = pd.concat([zero_class, downsample_one])

In [27]:
downsampled_df.shape

(610, 6)

In [28]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [29]:
base_models = [
    ("rf", RandomForestClassifier(n_estimators=30, max_depth=4)),
    ("svm", SVC()),
    ("knn", KNeighborsClassifier(n_neighbors=11))
]

In [30]:
meta_model = LogisticRegression()

In [31]:
stack_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, n_jobs=-1, cv=10)

In [32]:
stack_clf.fit(X, y)

In [33]:
stack_clf.score(X, y)

0.943