#Machine learning workflow with sckit learn

Load sample dataset:iris dataset

In [7]:
from sklearn.datasets import load_iris

iris= load_iris()

X=iris.data
y=iris.target

Splitting Dataset: Training & Testing set

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.4,
                                                    random_state=1)

#Training model

On Sckit Learn, Machine Learning Model created from class called Estimator

Each Estimator will implement two methods fit() and predict().

Fit() Method use for doing training model

Predict() Method use for doing estimate/predict using training model.

In [9]:
from sklearn.neighbors import KNeighborsClassifier

model=KNeighborsClassifier(n_neighbors=3)
model.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=3)

Evaluation Model

In [20]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f'Accuracy: {acc}')

Accuracy: 0.9833333333333333


Usefulness of Training Model

In [12]:
new_data = [[5, 5, 3, 2],
            [2, 4, 3, 5]]

preds = model.predict(new_data)
preds

array([1, 2])

In [14]:
pred_species = [iris.target_names[p] for p in preds]
print(f'Prediction Result: {pred_species}')

Prediction Result: ['versicolor', 'virginica']


#Dump & Load Trained Model

Dumping Machine learning Model to joblib file

In [17]:
import joblib

joblib.dump(model, 'iris_classifier_kkn.joblib')

['iris_classifier_kkn.joblib']

Loading Machine Learning Model from joblib file

In [19]:
production_model = joblib.load('iris_classifier_kkn.joblib')

#Part 2 Preprocessing data with Sckit-Learn

Sample Data

In [21]:
import numpy as np
from sklearn import preprocessing

sample_data = np.array([[2.1, -1.9, 5.5],
                       [-1.5, 2.4, 3.5],
                       [0.5, -7.9, 5.6],
                       [5.9, 2.3, -5.8]])

sample_data

array([[ 2.1, -1.9,  5.5],
       [-1.5,  2.4,  3.5],
       [ 0.5, -7.9,  5.6],
       [ 5.9,  2.3, -5.8]])

In [22]:
sample_data.shape

(4, 3)

Binarisation

In [23]:
sample_data

array([[ 2.1, -1.9,  5.5],
       [-1.5,  2.4,  3.5],
       [ 0.5, -7.9,  5.6],
       [ 5.9,  2.3, -5.8]])

In [25]:
preprocessor = preprocessing.Binarizer(threshold=0.5)
binarised_data = preprocessor.transform(sample_data)
binarised_data

array([[1., 0., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 0.]])

Scaling

In [26]:
sample_data

array([[ 2.1, -1.9,  5.5],
       [-1.5,  2.4,  3.5],
       [ 0.5, -7.9,  5.6],
       [ 5.9,  2.3, -5.8]])

In [28]:
preprocessor = preprocessing.MinMaxScaler(feature_range =(0, 1))
preprocessor.fit(sample_data)
scaled_data = preprocessor.transform(sample_data)
scaled_data

array([[0.48648649, 0.58252427, 0.99122807],
       [0.        , 1.        , 0.81578947],
       [0.27027027, 0.        , 1.        ],
       [1.        , 0.99029126, 0.        ]])

In [29]:
scaled_data = preprocessor.fit_transform(sample_data)
scaled_data

array([[0.48648649, 0.58252427, 0.99122807],
       [0.        , 1.        , 0.81578947],
       [0.27027027, 0.        , 1.        ],
       [1.        , 0.99029126, 0.        ]])

L1 Normalisation:Least Absolute Deviations

reference:https://en.wikipedia.org/wiki/Least_absolute_deviations#:~:text=Least%20absolute%20deviations%20(LAD)%2C,or%20sum%20of%20absolute%20errors)

In [30]:
sample_data

array([[ 2.1, -1.9,  5.5],
       [-1.5,  2.4,  3.5],
       [ 0.5, -7.9,  5.6],
       [ 5.9,  2.3, -5.8]])

In [31]:
l1_normalised_data = preprocessing.normalize(sample_data, norm='l1')
l1_normalised_data

array([[ 0.22105263, -0.2       ,  0.57894737],
       [-0.2027027 ,  0.32432432,  0.47297297],
       [ 0.03571429, -0.56428571,  0.4       ],
       [ 0.42142857,  0.16428571, -0.41428571]])

L2 Normalisation: Least Squares
    
reference:https://en.wikipedia.org/wiki/Least_squares

In [32]:
sample_data

array([[ 2.1, -1.9,  5.5],
       [-1.5,  2.4,  3.5],
       [ 0.5, -7.9,  5.6],
       [ 5.9,  2.3, -5.8]])

In [33]:
l2_normalised_data = preprocessing.normalize(sample_data, norm='l2')
l2_normalised_data

array([[ 0.33946114, -0.30713151,  0.88906489],
       [-0.33325106,  0.53320169,  0.7775858 ],
       [ 0.05156558, -0.81473612,  0.57753446],
       [ 0.68706914,  0.26784051, -0.6754239 ]])