In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV

## Loading Dataset but this time using link

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

# Preprocessing and Visualization

## 1. Visualize the dataset using scatterplots to explore the relationships between the features.

In [None]:
sns.pairplot(pd.DataFrame(X, columns=iris.feature_names))
plt.show()

## 2. Preprocessing 
### - scaling features
### - splitting Data into training and testing sets.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 3. Visualize the standardized features to identify any outliers.

In [None]:
plt.figure(figsize=(8,6))
sns.boxplot(data=X_train)
plt.xticks(np.arange(4), iris.feature_names, rotation=45)
plt.show()

## 4. Removing any outliers

In [None]:
outlier_indexes = ((X_train > 3) | (X_train < -3)).any(axis=1)
X_train = X_train[~outlier_indexes]
y_train = y_train[~outlier_indexes]

## 5. Visualize the class distributions using a bar plot

In [None]:
sns.countplot(x=y_train)
plt.xticks(np.arange(3), iris.target_names)
plt.show()

# Model Training and Testing

## Training the Naive Bayes and perform grid search for hyperparameter tuning.

In [None]:
param_grid = {'var_smoothing': np.logspace(0,-9, num=100)}

In [None]:
grid = GridSearchCV(estimator=nb, param_grid=param_grid, cv=5, verbose=0, n_jobs=-1)

In [None]:
grid.fit(X_train, y_train)

In [None]:
grid.best_params_

In [None]:
best_nb = grid.best_estimator_
y_pred = best_nb.predict(X_test)
print('Accuracy with best hyperparameters:', accuracy_score(y_test, y_pred))

In [None]:
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

In [None]:
print(classification_report(y_test, y_pred))