In [54]:
# Step 1: Import required libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [55]:
# Step 2: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [59]:
## The object returned by load_iris() is a sklearn.utils.Bunch — a dictionary-like object, not a pandas.DataFrame.
# Hehce, iris.describe() & iris.info() does not work
# Convert to DataFrame
import pandas as pd
df = pd.DataFrame(iris.data) # Does not display column names (columes displayed as indices)
df = pd.DataFrame(iris.data, columns = iris.feature_names)
df.describe() # Display summary stats

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [None]:
df.info() # Displays column types and non-null counts

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [57]:
# Try filtering some data
filtered_data = df[
    (df['sepal length (cm)'] > 7.0) & 
    (df ['petal length (cm)'] > 6.5)
    ]
print(filtered_data)

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
105                7.6               3.0                6.6               2.1
117                7.7               3.8                6.7               2.2
118                7.7               2.6                6.9               2.3
122                7.7               2.8                6.7               2.0


In [None]:
# Checking if any of the features has null/nan values
df.isna().any()

sepal length (cm)    False
sepal width (cm)     False
petal length (cm)    False
petal width (cm)     False
dtype: bool

In [None]:
# Get mean of selected feature
df['sepal length (cm)'].mean()

np.float64(5.843333333333334)

In [35]:
n_samples, n_features = iris.data.shape # n_samples => total data (rows), n_features => features/attributes that explain the data (columns)
print(n_samples, n_features)
print(X[0])
print(iris.feature_names)
print(iris.target_names)
print(iris.filename)

150 4
[5.1 3.5 1.4 0.2]
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']
iris.csv


In [25]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [61]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [11]:
# Step 3: Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Step 4: Create KNN classifier with 3 neighbors
classifier_knn = KNeighborsClassifier(n_neighbors=3)

In [13]:
# Step 5: Train the classifier
classifier_knn.fit(X_train, y_train)

In [14]:
# Step 6: Get and display model parameters
print("🔧 Model parameters:")
print(classifier_knn.get_params())

🔧 Model parameters:
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 3, 'p': 2, 'weights': 'uniform'}


In [15]:
# Step 7: Predict on test data
y_pred = classifier_knn.predict(X_test)
print("\n🔮 Predictions on test set:")
print(y_pred)


🔮 Predictions on test set:
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [16]:
# Step 8: Evaluate the model
print("\n✅ Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("\n📊 Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))
print("\n📄 Classification Report:\n", metrics.classification_report(y_test, y_pred))
print("🎯 Precision (weighted):", metrics.precision_score(y_test, y_pred, average='weighted'))
print("🔁 Recall (weighted):", metrics.recall_score(y_test, y_pred, average='weighted'))
print("📌 F1 Score (weighted):", metrics.f1_score(y_test, y_pred, average='weighted'))


✅ Accuracy: 1.0

📊 Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

📄 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

🎯 Precision (weighted): 1.0
🔁 Recall (weighted): 1.0
📌 F1 Score (weighted): 1.0


In [17]:
# Step 9: Predict a new sample
sample = [[3, 2, 1, 0.5]]
sample_pred = classifier_knn.predict(sample)
print("\n🔍 Predicted class index for sample:", sample_pred)
print("🌸 Predicted flower type:", iris.target_names[sample_pred[0]])


🔍 Predicted class index for sample: [0]
🌸 Predicted flower type: setosa


In [18]:
sample = [[3, 2, 1, 0.5]]
preds = classifier_knn.predict(sample)     # Gives array([0])
pred = preds[0]                             # Get scalar value from array
iris = load_iris()                          # Reload the dataset to get names
pred_species = iris.target_names[pred]      # Map 0 to 'setosa'
print("🌸 Predicted flower type:", pred_species)  # Output: setosa


🌸 Predicted flower type: setosa


### Model Persistence
Once you train the model, it is desitable that the model should be persist for future use so that we do not need to retrain i again and again. It can be done with the help of dump and load features of JOBLIB package / PICKLE package

In [19]:
import joblib
joblib.dump(classifier_knn, 'iris_classifier_knn_model.joblib')

['iris_classifier_knn_model.joblib']

In [20]:
import joblib
loaded_model = joblib.load('iris_classifier_knn_model.joblib')

In [21]:
preds = loaded_model.predict(sample)
pred = preds[0]                             # Get scalar value from array
iris = load_iris()                          # Reload the dataset to get names
pred_species = iris.target_names[pred] 
print("🌸 Predicted flower type:", pred_species)

🌸 Predicted flower type: setosa


In [22]:
import pickle
with open('urus_classifier_knn_model.pkl', 'wb') as f:
    pickle.dump(classifier_knn, f)

In [23]:
with open('urus_classifier_knn_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

loaded_model.predict(X_test)


array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])