In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classes = np.unique(y_train)
priors = {c: np.mean(y_train == c) for c in classes}

means = {c: X_train[y_train == c].mean(axis=0) for c in classes}
variances = {c: X_train[y_train == c].var(axis=0) for c in classes}

def gaussian_prob(x, mean, var):
    return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-((x - mean) ** 2) / (2 * var))

predictions = []
for x in X_test:
    probs = {}
    for c in classes:
        likelihood = np.prod(gaussian_prob(x, means[c], variances[c]))
        probs[c] = priors[c] * likelihood
    predictions.append(max(probs, key=probs.get))

accuracy = accuracy_score(y_test, predictions)
print(f"Step-by-Step Gaussian Naive Bayes Accuracy: {accuracy*100:.2f}%")


Step-by-Step Gaussian Naive Bayes Accuracy: 100.00%


In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
print("Number of mislabeled points out of a total %d points : %d"
      % (X_test.shape[0], (y_test != y_pred).sum()))

print(f"In-built GaussianNB Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")


Number of mislabeled points out of a total 30 points : 0
In-built GaussianNB Accuracy: 100.00%


In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15]}

knn = KNeighborsClassifier()
grid = GridSearchCV(knn, param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best value of K:", grid.best_params_['n_neighbors'])

best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)
print(f"Best KNN Model Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")


Best value of K: 3
Best KNN Model Accuracy: 100.00%
