In [1]:
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()

X = iris.data
y= iris.target

feature_names = iris.feature_names
target_names = iris.target_names

print("Feature names:", feature_names)
print("Target names:", target_names)
print("First 5 rows of X:\n", X[:5])
print("First 5 rows of y:\n", y[:5])


Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']
First 5 rows of X:
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
First 5 rows of y:
 [0 0 0 0 0]


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1 #70% 30% split, random state for reproducibility
)

print(X_train.shape) #105 samples, 4 features
print(X_test.shape)  #45 samples, 4 features

print(y_train.shape) #105 samples#
print(y_test.shape)  #45 samples

(105, 4)
(45, 4)
(105,)
(45,)


In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

classifier_knn = KNeighborsClassifier(n_neighbors=3)
classifier_knn.fit(X_train, y_train)
y_pred_knn = classifier_knn.predict(X_test)

accuracy_knn = metrics.accuracy_score(y_test, y_pred_knn)
print("KNN Accuracy:", accuracy_knn)

sample = [[5,5,3,2], [2,4,3,5]]
preds = classifier_knn.predict(sample)
pred_species = [iris.target_names[p] for p in preds ]
print("Predicted species for sample:", pred_species)

KNN Accuracy: 0.9777777777777777
Predicted species for sample: [np.str_('versicolor'), np.str_('virginica')]


# Save Model to File

In [14]:
import joblib
joblib.dump(classifier_knn, 'iris_classifier_knn.joblib')

['iris_classifier_knn.joblib']

In [15]:
model = joblib.load('iris_classifier_knn.joblib')

y_pred_knn = model.predict(X_test)
accuracy_knn = metrics.accuracy_score(y_test, y_pred_knn)
print("KNN Accuracy:", accuracy_knn)

KNN Accuracy: 0.9777777777777777


# Preprocessing Data

In [16]:
import numpy as np 
from sklearn import preprocessing

Input_data = np.array([
   [2.1, -1.9, 5.5],
   [-1.5, 2.4, 3.5],
   [0.5, -7.9, 5.6],
   [5.9, 2.3, -5.8]
])

data_binarized = preprocessing.Binarizer(threshold=0.5).transform(Input_data)
print("Binarized data:\n", data_binarized)

Binarized data:
 [[1. 0. 1.]
 [0. 1. 1.]
 [0. 0. 1.]
 [1. 1. 0.]]


In [17]:
print("Mean =", Input_data.mean(axis=0))
print("Standard Deviation =", Input_data.std(axis=0))

data_scaled = preprocessing.scale(Input_data)
print("Scaled data:\n", data_scaled)
print("Mean_removed =", data_scaled.mean(axis=0))
print("Std_removed =", data_scaled.std(axis=0))

Mean = [ 1.75  -1.275  2.2  ]
Standard Deviation = [2.71431391 4.20022321 4.69414529]
Scaled data:
 [[ 0.12894603 -0.14880162  0.70300338]
 [-1.19735598  0.8749535   0.27694073]
 [-0.46052153 -1.57729713  0.72430651]
 [ 1.52893149  0.85114524 -1.70425062]]
Mean_removed = [1.11022302e-16 0.00000000e+00 0.00000000e+00]
Std_removed = [1. 1. 1.]


In [18]:
data_scaler_minmax = preprocessing.MinMaxScaler(feature_range=(0,1))
data_scaled_minmax = data_scaler_minmax.fit_transform(Input_data)
print("Min-Max Scaled data:\n", data_scaled_minmax)

Min-Max Scaled data:
 [[0.48648649 0.58252427 0.99122807]
 [0.         1.         0.81578947]
 [0.27027027 0.         1.        ]
 [1.         0.99029126 0.        ]]


In [19]:
data_normalised_l1 = preprocessing.normalize(Input_data, norm='l1')
print("L1 Normalized data:\n", data_normalised_l1)

L1 Normalized data:
 [[ 0.22105263 -0.2         0.57894737]
 [-0.2027027   0.32432432  0.47297297]
 [ 0.03571429 -0.56428571  0.4       ]
 [ 0.42142857  0.16428571 -0.41428571]]


In [20]:
data_normalised_l2 = preprocessing.normalize(Input_data, norm='l2')
print("L2 Normalized data:\n", data_normalised_l2)

L2 Normalized data:
 [[ 0.33946114 -0.30713151  0.88906489]
 [-0.33325106  0.53320169  0.7775858 ]
 [ 0.05156558 -0.81473612  0.57753446]
 [ 0.68706914  0.26784051 -0.6754239 ]]
