In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

class BernoulliNB:
    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)


        X = (X > 0).astype(int)
        self.classes_ = np.unique(y)
        self.class_prior_ = np.array([np.mean(y == c) for c in self.classes_])


        self.feature_prob_ = np.clip(
            np.array([X[y == c].mean(axis=0) for c in self.classes_]), 1e-9, 1 - 1e-9
        )

    def predict(self, X):

        X = (X > 0).astype(int)

        log_prob = [
            (np.log(self.class_prior_[i]) +
             np.sum(X * np.log(self.feature_prob_[i]) +
                    (1 - X) * np.log(1 - self.feature_prob_[i]), axis=1))
            for i in range(len(self.classes_))
        ]
        return self.classes_[np.argmax(np.vstack(log_prob), axis=0)]


class ComplementNB:
    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.class_prior_ = np.array([np.mean(y == c) for c in self.classes_])
        total_counts = np.array([(X[y == c].sum(axis=0) + 1) for c in self.classes_])
        self.feature_prob_ = (total_counts.T / total_counts.sum(axis=1)).T

    def predict(self, X):
        log_prob = [np.log(self.class_prior_[i]) + np.sum(X * np.log(self.feature_prob_[i]), axis=1)
                    for i in range(len(self.classes_))]
        return self.classes_[np.argmax(np.vstack(log_prob), axis=0)]

class GaussianNB:
    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.class_prior_ = np.array([np.mean(y == c) for c in self.classes_])
        self.mean_ = np.array([X[y == c].mean(axis=0) for c in self.classes_])
        self.var_ = np.array([X[y == c].var(axis=0) for c in self.classes_])

        self.var_ = np.clip(self.var_, 1e-9, None)

    def predict(self, X):
        log_prob = [
            -0.5 * np.sum(np.log(2 * np.pi * self.var_[i])) -
            0.5 * np.sum(((X - self.mean_[i]) ** 2) / self.var_[i], axis=1) +
            np.log(self.class_prior_[i])
            for i in range(len(self.classes_))
        ]
        return self.classes_[np.argmax(np.vstack(log_prob), axis=0)]


class NearestCentroid:
    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.centroids_ = np.array([X[y == c].mean(axis=0) for c in self.classes_])

    def predict(self, X):
        distances = [np.linalg.norm(X - centroid, axis=1) for centroid in self.centroids_]
        return self.classes_[np.argmin(np.vstack(distances), axis=0)]

class MultinomialNB:
    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.class_prior_ = np.array([np.mean(y == c) for c in self.classes_])
        total_counts = np.array([(X[y == c].sum(axis=0) + 1) for c in self.classes_])
        self.feature_prob_ = (total_counts.T / total_counts.sum(axis=1)).T

    def predict(self, X):
        log_prob = [(np.log(self.class_prior_[i]) + np.sum(X * np.log(self.feature_prob_[i]), axis=1))
                    for i in range(len(self.classes_))]
        return self.classes_[np.argmax(np.vstack(log_prob), axis=0)]


models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

def evaluate_models(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    results = {}
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results[model_name] = accuracy
    return results


results = evaluate_models(X, y)

results_df = pd.DataFrame([results], index=["Iris Dataset"])
print(results_df)


              BernoulliNB  ComplementNB  GaussianNB  NearestCentroid  \
Iris Dataset     0.288889      0.955556    0.977778         0.955556   

              MultinomialNB  
Iris Dataset       0.955556  


In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
# Load MNIST data
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target.astype(int)

X = X / 255.0  
X_binary = (X > 0.5).astype(int)  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_binary, X_test_binary = X_binary[:len(X_train)], X_binary[len(X_train):]
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Instantiate your models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)  
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.1021
Training ComplementNB...
ComplementNB Accuracy: 0.8281
Training GaussianNB...
GaussianNB Accuracy: 0.5619
Training NearestCentroid...
NearestCentroid Accuracy: 0.8134
Training MultinomialNB...
MultinomialNB Accuracy: 0.8281


In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target  # Features and labels

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features for GaussianNB
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Binarize features for BernoulliNB
X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

# Define custom models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)  # Scaled features
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)  # Binary features
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)  # Original features
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.9649
Training ComplementNB...
ComplementNB Accuracy: 0.9386
Training GaussianNB...
GaussianNB Accuracy: 0.9649
Training NearestCentroid...
NearestCentroid Accuracy: 0.9386
Training MultinomialNB...
MultinomialNB Accuracy: 0.9386


In [16]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load the Digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features for GaussianNB
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Binarize features for BernoulliNB
X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

# Define custom models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)  # Scaled features
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)  # Binary features
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)  # Original features
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.8861
Training ComplementNB...
ComplementNB Accuracy: 0.9111
Training GaussianNB...
GaussianNB Accuracy: 0.7667
Training NearestCentroid...
NearestCentroid Accuracy: 0.9000
Training MultinomialNB...
MultinomialNB Accuracy: 0.9111


In [19]:
import openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load Spambase dataset from OpenML
spambase = openml.datasets.get_dataset(44)  # ID 44 corresponds to Spambase
X, y, _, _ = spambase.get_data(target=spambase.default_target_attribute)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features for GaussianNB
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Binarize features for BernoulliNB
X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

# Define custom models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)  # Scaled features
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)  # Binary features
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)  # Original features
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")




Training BernoulliNB...
BernoulliNB Accuracy: 0.8979
Training ComplementNB...
ComplementNB Accuracy: 0.7861
Training GaussianNB...
GaussianNB Accuracy: 0.8219
Training NearestCentroid...
NearestCentroid Accuracy: 0.6395
Training MultinomialNB...
MultinomialNB Accuracy: 0.7861


In [20]:
import openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load Diabetes dataset from OpenML
diabetes = openml.datasets.get_dataset(37)  # ID 37 corresponds to the Diabetes dataset
X, y, _, _ = diabetes.get_data(target=diabetes.default_target_attribute)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features for GaussianNB
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Binarize features for BernoulliNB
X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

# Define custom models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)  # Scaled features
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)  # Binary features
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)  # Original features
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.6753
Training ComplementNB...
ComplementNB Accuracy: 0.6623
Training GaussianNB...
GaussianNB Accuracy: 0.7662
Training NearestCentroid...
NearestCentroid Accuracy: 0.6753
Training MultinomialNB...
MultinomialNB Accuracy: 0.6623


In [18]:
!pip install openml

Collecting openml
  Downloading openml-0.15.0-py3-none-any.whl.metadata (9.9 kB)
Collecting liac-arff>=2.4.0 (from openml)
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xmltodict (from openml)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting minio (from openml)
  Downloading minio-7.2.10-py3-none-any.whl.metadata (6.5 kB)
Collecting pycryptodome (from minio->openml)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading openml-0.15.0-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.0/158.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading minio-7.2.10-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.9/93.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Downloading pyc

In [None]:

fashion_mnist = openml.datasets.get_dataset(40996)  
X, y, _, _ = fashion_mnist.get_data(target=fashion_mnist.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

# Define custom models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)  
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train) 
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train) 
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7031
Training ComplementNB...
ComplementNB Accuracy: 0.6661
Training GaussianNB...
GaussianNB Accuracy: 0.5753
Training NearestCentroid...
NearestCentroid Accuracy: 0.6874
Training MultinomialNB...
MultinomialNB Accuracy: 0.6661


In [None]:

banknote_auth = openml.datasets.get_dataset(1464) 
X, y, _, _ = banknote_auth.get_data(target=banknote_auth.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train) 
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333


In [None]:
titanic = openml.datasets.get_dataset(40945)
X, y, _, _ = titanic.get_data(target=titanic.default_target_attribute)

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

# Define custom models
models = {
    'BernoulliNB': BernoulliNB(),
    'ComplementNB': ComplementNB(),
    'GaussianNB': GaussianNB(),
    'NearestCentroid': NearestCentroid(),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train) 
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)  
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.8550
Training ComplementNB...
ComplementNB Accuracy: 0.5458
Training GaussianNB...
GaussianNB Accuracy: 0.5496
Training NearestCentroid...
NearestCentroid Accuracy: 0.4695
Training MultinomialNB...
MultinomialNB Accuracy: 0.5458


In [24]:
blood_transfusion = openml.datasets.get_dataset(1464)
X, y, _, _ = blood_transfusion.get_data(target=blood_transfusion.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333


In [25]:
blood_transfusion = openml.datasets.get_dataset(1464)
X, y, _, _ = blood_transfusion.get_data(target=blood_transfusion.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333


In [26]:
liver_disorders = openml.datasets.get_dataset(1464)
X, y, _, _ = liver_disorders.get_data(target=liver_disorders.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333


In [27]:
credit_approval = openml.datasets.get_dataset(1464)
X, y, _, _ = credit_approval.get_data(target=credit_approval.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333


In [28]:
mushroom = openml.datasets.get_dataset(1464)
X, y, _, _ = mushroom.get_data(target=mushroom.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333


In [29]:
phishing = openml.datasets.get_dataset(1464)
X, y, _, _ = phishing.get_data(target=phishing.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_binary = (X_train > X_train.mean(axis=0)).astype(int)
X_test_binary = (X_test > X_test.mean(axis=0)).astype(int)

for name, model in models.items():
    print(f"Training {name}...")

    if name == 'GaussianNB':
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    elif name == 'BernoulliNB':
        model.fit(X_train_binary, y_train)
        predictions = model.predict(X_test_binary)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy:.4f}")


Training BernoulliNB...
BernoulliNB Accuracy: 0.7400
Training ComplementNB...
ComplementNB Accuracy: 0.7333
Training GaussianNB...
GaussianNB Accuracy: 0.7533
Training NearestCentroid...
NearestCentroid Accuracy: 0.6800
Training MultinomialNB...
MultinomialNB Accuracy: 0.7333
