In [1]:
import pandas as pd
import numpy as np

In [2]:

iris = pd.read_csv("data\\Iris.csv")

In [3]:
iris

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [4]:

X=iris.drop(labels=["Species"],axis=1)

In [5]:
# There are 4 numeric independent variable
numerical_cols=X.select_dtypes(exclude='object').columns
numerical_cols

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'], dtype='object')

In [6]:
y=iris[["Species"]]

In [7]:
from sklearn.impute import SimpleImputer ## Handling Missing Values
from sklearn.preprocessing import StandardScaler # Handling Feature Scaling
from sklearn.preprocessing import OrdinalEncoder # Ordinal Encoding
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [8]:

# Creating the pipeline for preprocessing for input numeric variables
num_pipeline=Pipeline(
    steps=[
        
        ('imputer',SimpleImputer()),
        ('scaler', StandardScaler())
    ]
)  

In [9]:
# Creating the column transformation for the variables (here numeric variables only as they are dependent variables)
preprocessor=ColumnTransformer(
    [
        
        ('num_pipeline',num_pipeline,numerical_cols)
    ]
)

In [10]:
## Train test split

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=30)

In [11]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [12]:
## Model Training

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, 
    f1_score, 
    precision_score, 
    recall_score,
    roc_auc_score
)

# If confusion_matrix,classification_report are taken then we can use only them

In [13]:

import warnings
warnings.filterwarnings("ignore")

In [14]:
def evaluate_model(models_clf, param_grids):
    
    model_list_clf = []
    results_clf = []
    
    
    for name, model in models_clf.items():
        print("--"*20 + f"Evaluating {name}"+"--"*20)

        # Grid Search multi class
        grid_search = GridSearchCV(model, param_grid=param_grids[name], scoring='f1_weighted', cv=10) 
        grid_search.fit(X_train, y_train)

        # Best Model & Predictions
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)

        # Evaluation for multiclass classification
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted') 
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        print("Best Parameters:", grid_search.best_params_)
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F1 Score:", f1)
        
        # Store results for comparison
        model_list_clf.append(name)
        results_clf.append({'Model': name, 'Best Parameters': grid_search.best_params_, 
                        'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1})
    
    # Create a DataFrame for easy comparison
    results_df = pd.DataFrame(results_clf)    
        
    # Find the best classification model
    best_clf_model_idx = results_df['F1 Score'].idxmax()
    best_clf_model = results_df.iloc[best_clf_model_idx] 
    
    # Get predictions from the best model
    best_model_name = best_clf_model['Model']
    best_model = models_clf[best_model_name].set_params(**best_clf_model['Best Parameters'])
    best_model.fit(X_train, y_train)
    best_model_predictions = best_model.predict(X_test)   
    
    return {"model_list_clf": model_list_clf, 
            "results_clf": results_clf, 
            "results_df": results_df, 
            "best_clf_model_idx": best_clf_model_idx, 
            "best_clf_model": best_clf_model,
            "best_model_predictions": best_model_predictions }  

In [15]:
models_clf = {
    'SVC': SVC(),
    'RandomForestClassifier': RandomForestClassifier()
}

param_grids = {
    'SVC': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': ['scale', 'auto']},
    'RandomForestClassifier': {'n_estimators': [100, 200, 300], 'max_depth': [None, 5, 10], 'random_state': [30]}
}

In [16]:
evaluate_model(models_clf, param_grids)

----------------------------------------Evaluating SVC----------------------------------------
Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 0.9111111111111111
Precision: 0.9134199134199135
Recall: 0.9111111111111111
F1 Score: 0.9096296296296297
----------------------------------------Evaluating RandomForestClassifier----------------------------------------
Best Parameters: {'max_depth': None, 'n_estimators': 100, 'random_state': 30}
Accuracy: 0.9333333333333333
Precision: 0.9352733686067018
Recall: 0.9333333333333333
F1 Score: 0.933667000333667


{'model_list_clf': ['SVC', 'RandomForestClassifier'],
 'results_clf': [{'Model': 'SVC',
   'Best Parameters': {'C': 1, 'gamma': 'scale', 'kernel': 'linear'},
   'Accuracy': 0.9111111111111111,
   'Precision': 0.9134199134199135,
   'Recall': 0.9111111111111111,
   'F1 Score': 0.9096296296296297},
  {'Model': 'RandomForestClassifier',
   'Best Parameters': {'max_depth': None,
    'n_estimators': 100,
    'random_state': 30},
   'Accuracy': 0.9333333333333333,
   'Precision': 0.9352733686067018,
   'Recall': 0.9333333333333333,
   'F1 Score': 0.933667000333667}],
 'results_df':                     Model                                    Best Parameters  \
 0                     SVC     {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}   
 1  RandomForestClassifier  {'max_depth': None, 'n_estimators': 100, 'rand...   
 
    Accuracy  Precision    Recall  F1 Score  
 0  0.911111   0.913420  0.911111  0.909630  
 1  0.933333   0.935273  0.933333  0.933667  ,
 'best_clf_model_idx': 1,
 'best_c

In [17]:

model_from_grid_srch = evaluate_model(models_clf, param_grids)

----------------------------------------Evaluating SVC----------------------------------------
Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 0.9111111111111111
Precision: 0.9134199134199135
Recall: 0.9111111111111111
F1 Score: 0.9096296296296297
----------------------------------------Evaluating RandomForestClassifier----------------------------------------
Best Parameters: {'max_depth': None, 'n_estimators': 100, 'random_state': 30}
Accuracy: 0.9333333333333333
Precision: 0.9352733686067018
Recall: 0.9333333333333333
F1 Score: 0.933667000333667


In [18]:

model_from_grid_srch["best_clf_model"]

Model                                         RandomForestClassifier
Best Parameters    {'max_depth': None, 'n_estimators': 100, 'rand...
Accuracy                                                    0.933333
Precision                                                   0.935273
Recall                                                      0.933333
F1 Score                                                    0.933667
Name: 1, dtype: object

In [19]:

model_from_grid_srch["results_df"]

Unnamed: 0,Model,Best Parameters,Accuracy,Precision,Recall,F1 Score
0,SVC,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",0.911111,0.91342,0.911111,0.90963
1,RandomForestClassifier,"{'max_depth': None, 'n_estimators': 100, 'rand...",0.933333,0.935273,0.933333,0.933667


In [20]:
model_from_grid_srch["best_clf_model"]["Best Parameters"]

{'max_depth': None, 'n_estimators': 100, 'random_state': 30}

In [21]:
y_pred = pd.DataFrame(model_from_grid_srch["best_model_predictions"])

In [22]:

result = pd.concat([y_pred,y_test.reset_index()],axis=1)

In [23]:
result

Unnamed: 0,0,index,Species
0,setosa,20,setosa
1,setosa,5,setosa
2,setosa,3,setosa
3,virginica,101,virginica
4,versicolor,82,versicolor
5,versicolor,80,versicolor
6,virginica,123,virginica
7,virginica,145,virginica
8,versicolor,74,versicolor
9,virginica,107,virginica
