In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
def calculate_mean_variance(X):
    mean = np.mean(X, axis=0)
    variance = np.var(X, axis=0)
    return mean, variance

In [5]:
means = np.array([calculate_mean_variance(X_train[y_train == i])[0] for i in np.unique(y_train)])
variances = np.array([calculate_mean_variance(X_train[y_train == i])[1] for i in np.unique(y_train)])



In [6]:
priors = [np.mean(y_train == i) for i in np.unique(y_train)]

In [7]:
def calculate_likelihood(X, mean, variance):
    exponent = np.exp(-((X - mean) ** 2 / (2 * variance)))
    likelihood = (1 / (np.sqrt(2 * np.pi * variance))) * exponent
    return likelihood

In [8]:
def calculate_posterior(X):
    posteriors = []
    for i in range(len(np.unique(y_train))):
        prior = np.log(priors[i])
        likelihood = calculate_likelihood(X, means[i], variances[i])
        posterior = np.sum(np.log(likelihood), axis=1) + prior
        posteriors.append(posterior)
    return np.argmax(posteriors, axis=0)

In [9]:
predictions = calculate_posterior(X_test)

accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy :.4f}')

Accuracy: 0.9778


In [10]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [11]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [12]:
model = GaussianNB()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy :.4f}')


Accuracy: 0.9778


In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)


In [14]:
param_grid = {'n_neighbors': range(1, 31)}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

Best parameters: {'n_neighbors': 8}
Best score: 0.9494736842105264


  _data = np.array(data, dtype=dtype, copy=copy,


In [15]:
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)
print("Test accuracy:", best_knn.score(X_test, y_test))


Test accuracy: 0.9811320754716981
