Example 1: Loading the Iris Dataset

This code loads the iris dataset and displays feature names, target names, and first 10 rows.

In [1]:
# Import dataset loader
from sklearn.datasets import load_iris

# Load iris dataset
iris = load_iris()

# Extract feature matrix (X)
X = iris.data

# Extract target vector (y)
y = iris.target

# Get feature names
feature_names = iris.feature_names

# Get target names
target_names = iris.target_names

# Print feature names
print("Feature names:", feature_names)

# Print target names
print("Target names:", target_names)

# Print first 10 rows of dataset
print("\nFirst 10 rows of X:\n", X[:10])

Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']

First 10 rows of X:
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]


Example 2: Splitting the Dataset

This code splits the dataset into training and testing sets.

In [2]:
# Import dataset
from sklearn.datasets import load_iris
iris = load_iris()

# Features and target
X = iris.data
y = iris.target

# Import train_test_split function
from sklearn.model_selection import train_test_split

# Split dataset (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(
   X, y, test_size=0.3, random_state=1
)

# Print shapes of datasets
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(105, 4)
(45, 4)
(105,)
(45,)


Example 3: Training KNN Model

This code trains a KNN classifier and checks accuracy.

In [3]:
# Import dataset
from sklearn.datasets import load_iris
iris = load_iris()

# Features and target
X = iris.data
y = iris.target

# Split dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
   X, y, test_size=0.4, random_state=1
)

# Import KNN classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

# Create KNN model (K=3)
classifier_knn = KNeighborsClassifier(n_neighbors=3)

# Train model
classifier_knn.fit(X_train, y_train)

# Predict test data
y_pred = classifier_knn.predict(X_test)

# Print accuracy
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# Sample prediction
sample = [[5, 5, 3, 2], [2, 4, 3, 5]]

# Predict sample data
preds = classifier_knn.predict(sample)

# Convert numeric output to species name
pred_species = [iris.target_names[p] for p in preds]

print("Predictions:", pred_species)

Accuracy: 0.9833333333333333
Predictions: [np.str_('versicolor'), np.str_('virginica')]


Example 4: Model Persistence (Save & Load Model)

This code saves trained model and loads it again.

In [4]:
# Import required libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import joblib

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1
)

# Train model
classifier_knn = KNeighborsClassifier(n_neighbors=3)
classifier_knn.fit(X_train, y_train)

# Save model into file
joblib.dump(classifier_knn, 'iris_classifier_knn.joblib')

# Load model from file
loaded_model = joblib.load('iris_classifier_knn.joblib')

print("Model Saved and Loaded Successfully")

Model Saved and Loaded Successfully


Example 5: Binarisation

Converts values above threshold to 1 and below to 0.

In [5]:
import numpy as np
from sklearn import preprocessing

# Create input data
input_data = np.array([
   [2.1, -1.9, 5.5],
   [-1.5, 2.4, 3.5],
   [0.5, -7.9, 5.6],
   [5.9, 2.3, -5.8]
])

# Apply binarization (threshold=0.5)
data_binarized = preprocessing.Binarizer(threshold=0.5).transform(input_data)

# Print result
print("\nBinarized data:\n", data_binarized)


Binarized data:
 [[1. 0. 1.]
 [0. 1. 1.]
 [0. 0. 1.]
 [1. 1. 0.]]


Example 6: Mean Removal (Standardization)

Removes mean and scales to unit variance.

In [6]:
import numpy as np
from sklearn import preprocessing

# Input data
input_data = np.array([
   [2.1, -1.9, 5.5],
   [-1.5, 2.4, 3.5],
   [0.5, -7.9, 5.6],
   [5.9, 2.3, -5.8]
])

# Print mean and std deviation
print("Mean =", input_data.mean(axis=0))
print("Stddeviation =", input_data.std(axis=0))

# Apply scaling
data_scaled = preprocessing.scale(input_data)

# Print new mean and std deviation
print("Mean_removed =", data_scaled.mean(axis=0))
print("Stddeviation_removed =", data_scaled.std(axis=0))

Mean = [ 1.75  -1.275  2.2  ]
Stddeviation = [2.71431391 4.20022321 4.69414529]
Mean_removed = [1.11022302e-16 0.00000000e+00 0.00000000e+00]
Stddeviation_removed = [1. 1. 1.]


Example 7: Min-Max Scaling

Scales values between 0 and 1.

In [7]:
import numpy as np
from sklearn import preprocessing

# Input data
input_data = np.array([
   [2.1, -1.9, 5.5],
   [-1.5, 2.4, 3.5],
   [0.5, -7.9, 5.6],
   [5.9, 2.3, -5.8]
])

# Create MinMaxScaler object
data_scaler_minmax = preprocessing.MinMaxScaler(feature_range=(0,1))

# Fit and transform data
data_scaled_minmax = data_scaler_minmax.fit_transform(input_data)

# Print result
print("\nMin max scaled data:\n", data_scaled_minmax)


Min max scaled data:
 [[0.48648649 0.58252427 0.99122807]
 [0.         1.         0.81578947]
 [0.27027027 0.         1.        ]
 [1.         0.99029126 0.        ]]


Example 8: L1 Normalisation

Makes sum of absolute values in each row equal to 1.

In [8]:
import numpy as np
from sklearn import preprocessing

# Input data
input_data = np.array([
   [2.1, -1.9, 5.5],
   [-1.5, 2.4, 3.5],
   [0.5, -7.9, 5.6],
   [5.9, 2.3, -5.8]
])

# Apply L1 normalization
data_normalized_l1 = preprocessing.normalize(input_data, norm='l1')

print("\nL1 normalized data:\n", data_normalized_l1)


L1 normalized data:
 [[ 0.22105263 -0.2         0.57894737]
 [-0.2027027   0.32432432  0.47297297]
 [ 0.03571429 -0.56428571  0.4       ]
 [ 0.42142857  0.16428571 -0.41428571]]


Example 9: L2 Normalisation

Makes sum of squares in each row equal to 1.

In [9]:
import numpy as np
from sklearn import preprocessing

# Input data
input_data = np.array([
   [2.1, -1.9, 5.5],
   [-1.5, 2.4, 3.5],
   [0.5, -7.9, 5.6],
   [5.9, 2.3, -5.8]
])

# Apply L2 normalization
data_normalized_l2 = preprocessing.normalize(input_data, norm='l2')

print("\nL2 normalized data:\n", data_normalized_l2)


L2 normalized data:
 [[ 0.33946114 -0.30713151  0.88906489]
 [-0.33325106  0.53320169  0.7775858 ]
 [ 0.05156558 -0.81473612  0.57753446]
 [ 0.68706914  0.26784051 -0.6754239 ]]


EXERCISE (Digits Dataset)

This code explores digits dataset, trains KNN, computes accuracy, confusion matrix and classification report.

In [10]:
# Import required libraries
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

# Load digits dataset
digits = load_digits()

# Features and labels
X = digits.data
y = digits.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=True, random_state=42
)

# Create KNN model
knn = KNeighborsClassifier(n_neighbors=3)

# Train model
knn.fit(X_train, y_train)

# Predict test data
y_pred = knn.predict(X_test)

# Print accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Print confusion matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Print classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9888888888888889

Confusion Matrix:
 [[53  0  0  0  0  0  0  0  0  0]
 [ 0 50  0  0  0  0  0  0  0  0]
 [ 0  0 47  0  0  0  0  0  0  0]
 [ 0  0  0 54  0  0  0  0  0  0]
 [ 0  0  0  0 60  0  0  0  0  0]
 [ 0  0  0  0  0 66  0  0  0  0]
 [ 0  0  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  0  0 54  0  1]
 [ 0  1  0  0  0  0  0  0 42  0]
 [ 0  0  0  1  1  1  0  0  1 55]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.98      1.00      0.99        50
           2       1.00      1.00      1.00        47
           3       0.98      1.00      0.99        54
           4       0.98      1.00      0.99        60
           5       0.99      1.00      0.99        66
           6       1.00      1.00      1.00        53
           7       1.00      0.98      0.99        55
           8       0.98      0.98      0.98        43
           9       0.98      0.93      0.96        59


TASK 2 (Fruits Dataset)

This code creates custom fruit dataset and applies KNN.

In [11]:
# Import required libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# --------------------------------------------
# Step 1: Create Fruit Dataset
# Features: [Weight, Diameter, Colour]
# Labels: 0 = Cherry, 1 = Apple, 2 = Watermelon
# --------------------------------------------

X = np.array([
    [10, 2, 0], [12, 2.5, 0], [9, 2.1, 0], [11, 2.3, 0],        # Cherry
    [150, 7, 0], [170, 8, 1], [160, 7.5, 1], [180, 8.2, 1],    # Apple
    [3000, 25, 2], [3500, 30, 2], [2800, 24, 2], [3200, 27, 2] # Watermelon
])

y = np.array([0,0,0,0, 1,1,1,1, 2,2,2,2])

# --------------------------------------------
# Step 2: Split Dataset (Stratified Split)
# This ensures every class appears in train & test
# --------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    shuffle=True,
    stratify=y,        # IMPORTANT FIX
    random_state=42
)

# --------------------------------------------
# Step 3: Train KNN Model
# --------------------------------------------

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# --------------------------------------------
# Step 4: Make Predictions
# --------------------------------------------

y_pred = knn.predict(X_test)

# --------------------------------------------
# Step 5: Evaluate Model
# --------------------------------------------

print("Accuracy:", accuracy_score(y_test, y_pred))

print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, zero_division=0))  # FIXED

Accuracy: 1.0

Confusion Matrix:

[[1 0 0]
 [0 1 0]
 [0 0 2]]

Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

