In [14]:
from imblearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from imblearn.under_sampling import RandomUnderSampler

from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from scipy.stats import entropy

import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
     

In [12]:
class RandomStump:

    def __init__(self, feature_index=None, threshold=None, prediction_left=None,
                 prediction_right=None, random_state=None):

        self.feature_index = feature_index
        self.threshold = threshold
        self.prediction_left = prediction_left
        self.prediction_right = prediction_right
        self.random_state = random_state

    def fit(self, X, y):

        # Set the random seed for reproducibility
        if self.random_state is not None:
            np.random.seed(self.random_state)

        # Select a random feature
        n_features = X.shape[1]
        self.feature_index = np.random.randint(0, n_features)

        # Select a random threshold within the range of the selected feature's values
        feature_values = X[:, self.feature_index]
        self.threshold = round(np.random.uniform(feature_values.min(), feature_values.max()), 2)

        # Generate random predictions for each side of the threshold
        unique_classes = np.unique(y)
        np.random.shuffle(unique_classes)
        self.prediction_left = unique_classes[0]
        self.prediction_right = unique_classes[1]

    def predict(self, X):

        feature_values = X[:, self.feature_index]
        predictions = np.where(feature_values < self.threshold, self.prediction_left, self.prediction_right)
        return predictions

    def __repr__(self):
        return (f"RandomStump(feature_index={self.feature_index}, threshold={self.threshold}, "
                f"prediction_left={self.prediction_left}, prediction_right={self.prediction_right}, "
                f"random_state={self.random_state})")

In [13]:
# Generate an example dataset
X, y = make_classification(n_samples=100, n_features=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Train the model
random_stump = RandomStump()
random_stump.fit(X_train, y_train)
print(random_stump)

# Make predictions
y_pred = random_stump.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f' - Accuracy: {accuracy:.2f}')
print(f' - Precision: {precision:.2f}')
print(f' - Recall: {recall:.2f}')
print(f' - F1-score: {f1:.2f}')

RandomStump(feature_index=5, threshold=-0.26, prediction_left=1, prediction_right=0, random_state=None)
 - Accuracy: 0.47
 - Precision: 0.67
 - Recall: 0.40
 - F1-score: 0.50
