In [1]:
# Importing necessary libraries
import numpy as np  # For numerical operations
import pandas as pd  # For data manipulation and analysis
import matplotlib.pyplot as plt  # For plotting and visualization
import seaborn as sns  # For enhanced data visualization

# Importing machine learning functions from scikit-learn
from sklearn import datasets  # To load standard datasets
from sklearn.model_selection import train_test_split  # For splitting data into training and test sets
from sklearn.preprocessing import StandardScaler  # For feature scaling
from sklearn.neighbors import KNeighborsClassifier  # For using the k-Nearest Neighbors classifier
from sklearn.metrics import classification_report, confusion_matrix  # For model evaluation

# Loading the Iris dataset
iris = datasets.load_iris()
# Converting the Iris dataset into a pandas DataFrame for easier data handling
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
# Adding a new column 'species' with the species names corresponding to the numeric target codes
iris_df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)



In [None]:
# Exploratory analysis

# Display the first few rows of the iris DataFrame for a quick overview
iris_df.head()

# Generate descriptive statistics that summarize the central tendency, 
# dispersion, and shape of the dataset’s distribution, excluding NaN values
iris_df.describe()

# Create a pairplot of the iris DataFrame to visualize the relationships 
# between its features, color-coded by species for better distinction
sns.pairplot(iris_df, hue='species')

# Display the pairplot
plt.show()


In [None]:
# Prepare the data

# Assign feature data to x (independent variables)
x = iris.data 

# Assign target data to y (dependent variable)
y = iris.target 

# Split the dataset into training and testing sets
# 70% of the data will be used for training and 30% for testing
# random_state is set to ensure reproducibility of results
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)


In [None]:
# Feature Scaling

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the training data and then transform it
# This standardizes the features by removing the mean and scaling to unit variance
X_train = scaler.fit_transform(x_train)

# Transform the testing data using the same scaler
# This ensures that the test data is scaled based on the training data parameters
X_test = scaler.transform(x_test)


In [None]:
# Choose and train the model

# Initialize the k-Nearest Neighbors classifier with 3 neighbors
knn = KNeighborsClassifier(n_neighbors=3)

# Train the classifier using the training data
knn.fit(x_train, y_train)


In [None]:
# Model evaluation

# Use the trained model to make predictions on the test set
y_pred = knn.predict(x_test)

# Print the confusion matrix to evaluate the accuracy of the classification
print(confusion_matrix(y_test, y_pred))

# Print a classification report to show various metrics like precision, recall, and F1-score
print(classification_report(y_test, y_pred))
