In [1]:
# Load modules
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

import joblib


In [12]:
# Load the dataset

iris_df = pd.read_csv("IRIS.csv")

In [13]:
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [15]:
iris_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [16]:
# Split the dataset into features and target

X = iris_df.iloc[:, :-1].values
y = iris_df.iloc[:, -1].values

In [17]:
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

In [18]:
# Standardize the features using StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train_sclaed = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Encode the target using LabelEncoder

encoder = LabelEncoder()
encoder.fit(y_train)
y_train_encoded = encoder.transform(y_train)
y_test_encoded = encoder.transform(y_test)


In [19]:
# Train the KNN model and make predictions

knn_model = KNeighborsClassifier(n_neighbors=5, metric="minkowski", weights="uniform")
knn_model.fit(X_train_sclaed, y_train_encoded)
y_pred = knn_model.predict(X_test_scaled)


In [20]:
# Evaluate the model performance using testing and training sets

test_score = knn_model.score(X_test_scaled, y_test_encoded)
train_score = knn_model.score(X_train_sclaed, y_train_encoded)
print(f"Testing score: {test_score:.2f}")
print(f"Training score: {train_score:.2f}")


Testing score: 0.98
Training score: 0.95


In [21]:
# Generate the classification report evaluating the model performance

print(classification_report(y_test_encoded, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.94      1.00      0.97        15
           2       1.00      0.94      0.97        16

    accuracy                           0.98        50
   macro avg       0.98      0.98      0.98        50
weighted avg       0.98      0.98      0.98        50



In [22]:
joblib.dump(knn_model, "Persistence/knn_model.sav")
joblib.dump(scaler, "Persistence/scaler_features.sav")
joblib.dump(encoder, "Persistence/label_encoder.sav")


['Persistence/label_encoder.sav']