In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score


In [3]:
# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Labels (class)

# Print feature and class names for clarity
print("Feature names:", iris.feature_names)
print("Target names:", iris.target_names)


Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']


In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
def calculate_priors(y_train):
    classes, counts = np.unique(y_train, return_counts=True)
    priors = counts / len(y_train)
    return dict(zip(classes, priors))

priors = calculate_priors(y_train)
print("Prior probabilities:", priors)


Prior probabilities: {0: 0.3333333333333333, 1: 0.3416666666666667, 2: 0.325}


In [9]:
def calculate_gaussian_parameters(X_train, y_train):
    parameters = {}
    classes = np.unique(y_train)
    
    for cls in classes:
        X_cls = X_train[y_train == cls]
        mean = np.mean(X_cls, axis=0)
        var = np.var(X_cls, axis=0)
        parameters[cls] = {'mean': mean, 'var': var}
    
    return parameters

gaussian_params = calculate_gaussian_parameters(X_train, y_train)
print("Gaussian parameters (mean and variance):", gaussian_params)


Gaussian parameters (mean and variance): {0: {'mean': array([4.99  , 3.4525, 1.45  , 0.245 ]), 'var': array([0.1239    , 0.15249375, 0.033     , 0.010975  ])}, 1: {'mean': array([5.9195122 , 2.77073171, 4.24146341, 1.32195122]), 'var': array([0.28693635, 0.10011898, 0.22584176, 0.04122546])}, 2: {'mean': array([6.53333333, 2.96666667, 5.52051282, 2.        ]), 'var': array([0.4165812 , 0.0991453 , 0.28573307, 0.08205128])}}


In [11]:
def gaussian_likelihood(x, mean, var):
    exponent = np.exp(-((x - mean) ** 2) / (2 * var))
    return (1 / np.sqrt(2 * np.pi * var)) * exponent


In [13]:
def calculate_posterior(X, priors, params):
    posteriors = []
    
    for cls, cls_params in params.items():
        prior = np.log(priors[cls])
        likelihood = np.sum(np.log(gaussian_likelihood(X, cls_params['mean'], cls_params['var'])))
        posterior = prior + likelihood
        posteriors.append(posterior)
    
    return np.argmax(posteriors)

def predict(X_test, priors, params):
    predictions = [calculate_posterior(x, priors, params) for x in X_test]
    return np.array(predictions)


In [15]:
# Make predictions on the test set
y_pred = predict(X_test, priors, gaussian_params)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 100.00%


In [17]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 1: Define the model
knn = KNeighborsClassifier()

# Step 2: Define the parameter grid
# We'll search for the best 'n_neighbors' value in the range from 1 to 10
param_grid = {'n_neighbors': range(1, 11)}

# Step 3: Set up GridSearchCV
# We'll use 5-fold cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)

# Step 4: Fit the model
grid_search.fit(X_train, y_train)

# Step 5: Obtain the best hyperparameter and best model
print("Best parameters found:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

# Step 6: Make predictions with the best model on the test set
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

# Calculate the test accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy with the best K: {test_accuracy * 100:.2f}%")


Best parameters found: {'n_neighbors': 3}
Best cross-validation accuracy: 0.9583333333333334
Test accuracy with the best K: 100.00%
