In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

df = load_iris()
X = df.data
y=df.target


X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Naive Bayes classifier
nb = GaussianNB()
nb.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = nb.predict(X_test_scaled)

# Compute the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Use different training/test set splits
splits = [(0.75, 0.25), (2/3, 1/3)]
for split in splits:
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=1-split[0], random_state=42)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    nb.fit(X_train_scaled, y_train)
    y_pred = nb.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Split {split}: Accuracy={accuracy}")

# Use different methods for training set selection
methods = ['hold-out', 'random_subsampling', 'cross-validation']
for method in methods:
    if method == 'hold-out':
        X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=42)
    elif method == 'random_subsampling':
        # Perform random subsampling (e.g., K iterations)
        pass  # Implement your random subsampling logic here
    elif method == 'cross-validation':
        scores = cross_val_score(nb, X,y, cv=5)
        print(f"Cross-validation scores: {scores}")

    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    nb.fit(X_train_scaled, y_train)
    y_pred = nb.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Method: {method}, Accuracy={accuracy}")


Accuracy: 1.0
Split (0.75, 0.25): Accuracy=1.0
Split (0.6666666666666666, 0.3333333333333333): Accuracy=0.9607843137254902
Method: hold-out, Accuracy=1.0
Method: random_subsampling, Accuracy=1.0
Cross-validation scores: [0.93333333 0.96666667 0.93333333 0.93333333 1.        ]
Method: cross-validation, Accuracy=1.0


75/25