## Hyperparameter Tuning using (iris) Dataset in Practice

In [1]:
import pandas as pd
import seaborn as sns

In [4]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [6]:
x = df.drop('species', axis=1)
y = df['species']

In [24]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=42)

In [25]:
from sklearn.preprocessing import StandardScaler

In [26]:
# using pipeline and LogisticRegression algorithm
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
pipeline = Pipeline([('scaler', StandardScaler()),
                    ('model', LogisticRegression())
    ])

In [27]:
pipeline.fit(x_train, y_train)

0,1,2
,steps,"[('scaler', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [40]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
y_pred = pipeline.predict(x_test)
print('Accuracy', accuracy_score(y_test, y_pred))

Accuracy 1.0


In [37]:
from sklearn.model_selection import GridSearchCV, cross_val_score

In [41]:
print('Confusion Matrix', confusion_matrix(y_test, y_pred))

Confusion Matrix [[6 0 0]
 [0 6 0]
 [0 0 3]]


In [42]:
print('Confusion Matrix', classification_report(y_test, y_pred))

Confusion Matrix               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00         6
  versicolor       1.00      1.00      1.00         6
   virginica       1.00      1.00      1.00         3

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15



## Using With Hyperparamter tuning using GridSeachCV

In [44]:
from sklearn.ensemble import RandomForestClassifier

In [67]:
rf = RandomForestClassifier(random_state=42)
rf.fit(x_train,y_train)
rf.predict(x_test)

array(['versicolor', 'setosa', 'virginica', 'versicolor', 'versicolor',
       'setosa', 'versicolor', 'virginica', 'versicolor', 'versicolor',
       'virginica', 'setosa', 'setosa', 'setosa', 'setosa'], dtype=object)

In [68]:
print('Accuracy', accuracy_score(y_test, y_pred))

Accuracy 1.0


In [69]:
param_grid = {
    'max_depth': [5,10, None], 
    'max_features' : ["sqrt", "log2", None],
    'n_estimators' : [50,60, 40],
    'criterion' : ["gini", "entropy", "log_loss"]
}

In [70]:
grid = GridSearchCV(estimator=rf, param_grid=param_grid,cv=5,  scoring='accuracy')

In [71]:
grid.fit(x_train,y_train)
y_pred_best = grid.predict(x_test)
print(grid.best_params_)

{'criterion': 'gini', 'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 50}


In [72]:
print('Accuracy', accuracy_score(y_test, y_pred_best))

Accuracy 1.0


In [74]:
best_params = {
    'criterion': 'gini',
    'max_depth': 5,
    'max_features': 'sqrt',
    'n_estimators': 50
}

In [79]:
final_model = RandomForestClassifier(**best_params)
final_model.fit(x_train, y_train)
pred = final_model.predict(x_test)

In [80]:
print('Accuracy', accuracy_score(y_test, pred))

Accuracy 1.0


In [82]:
import joblib

In [83]:
save_model = joblib.dump(final_model, 'Iris_rf')

In [84]:
save_model

['Iris_rf']