# Iris classification problem

## Importing libraries

In [None]:
import os
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score, precision_score

from xgboost import XGBClassifier

import mlflow

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = load_iris()
features = df.data
targets = df.target

Converting to normal df

In [3]:
df = pd.concat([pd.DataFrame(data=features,columns=df.feature_names),
           pd.DataFrame(data=targets,columns=['target'])],
           axis=1)

In [4]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


Using only sepal length and width

In [5]:
df2 = df.drop(columns=['petal length (cm)', 'petal width (cm)'])

In [6]:
def split_and_scale(df):
    X = df.drop(columns='target')
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test

In [37]:
def log_test_metrics(model, X_test, y_test):
    predictions = model.predict(X_test)
    precision = precision_score(y_test, predictions, average='micro')
    f1 = f1_score(y_test, predictions, average='micro')
    mlflow.log_metric('test_precision', precision)
    mlflow.log_metric('test_f1', f1)

In [None]:
mlflow_tracking_uri = os.getenv('MLFLOW_TRACKING_URI', 'http://127.0.0.1:5000')
print(f"Setting MLflow tracking URI to: {mlflow_tracking_uri}")
mlflow.set_tracking_uri(mlflow_tracking_uri)
mlflow.set_experiment('Using different features 2')
mlflow.sklearn.autolog(
    log_models = False,
    log_datasets = False,
    log_input_examples = False
)

# simple logistic regression
with mlflow.start_run(run_name="basic_logistic_regression_with_2_features"):
    X_train, X_test, y_train, y_test = split_and_scale(df2)
    lr = LogisticRegression(random_state=42, n_jobs=-1)
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    print(classification_report(y_test, y_pred))
    log_test_metrics(lr, X_test, y_test)

with mlflow.start_run(run_name="grid_search_logistic_regression_with_all_features"):
    X_train, X_test, y_train, y_test = split_and_scale(df)
    # also logistic regression but with tuning
    lr = LogisticRegression(random_state=42, n_jobs=-1)
    params = {
        'C': [0.1, 1, 10],
        'solver': ['liblinear', 'lbfgs'],
    }

    clf = GridSearchCV(
        cv=5,
        param_grid=params,
        estimator=lr,
        n_jobs=-1,
        return_train_score=True,
        verbose=1,
        refit=True
    )
    
    clf.fit(X_train, y_train)
    display(clf.best_estimator_, clf.best_params_)
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    log_test_metrics(clf, X_test, y_test)

              precision    recall  f1-score   support

           0       0.91      1.00      0.95        10
           1       0.67      0.40      0.50        10
           2       0.62      0.80      0.70        10

    accuracy                           0.73        30
   macro avg       0.73      0.73      0.72        30
weighted avg       0.73      0.73      0.72        30

🏃 View run basic_logistic_regression_with_2_features at: http://127.0.0.1:8080/#/experiments/345786646128698872/runs/71c38681445648c29c1139787522fcd5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/345786646128698872
Fitting 5 folds for each of 6 candidates, totalling 30 fits


2025/04/16 12:36:00 INFO mlflow.sklearn.utils: Logging the 5 best runs, one run will be omitted.


{'C': 10, 'solver': 'lbfgs'}

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      0.90      0.90        10
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30

🏃 View run grid_search_logistic_regression_with_all_features at: http://127.0.0.1:8080/#/experiments/345786646128698872/runs/1aa524be792140a9b9820b3a6a2c1d9e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/345786646128698872


decent performance with only 2 features.

But with all features performance is much much better.

In [39]:
mlflow.set_experiment('Using different algorithms')
mlflow.sklearn.autolog(
    log_models = True,
    log_datasets = True,
    log_input_examples = True
)
with mlflow.start_run(run_name="random_forests"):
    rf = RandomForestClassifier(random_state=42, n_jobs=-1)
    rf_params = {
        'max_depth': [2, 3, 4],
        'min_samples_leaf': [3, 4, 5]
    }

    rf_clf = GridSearchCV(
        estimator=rf,
        param_grid=rf_params,
        n_jobs=-1,
        cv=5,
        refit=True,
        verbose=True
        )

    rf_clf.fit(X_train, y_train)
    y_pred_rf = rf_clf.predict(X_test)
    print(classification_report(y_test, y_pred_rf))
    log_test_metrics(rf_clf, X_test, y_test)



Fitting 5 folds for each of 9 candidates, totalling 45 fits


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 1954.09it/s] 
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 2561.07it/s] 
2025/04/16 12:36:10 INFO mlflow.sklearn.utils: Logging the 5 best runs, 4 runs will be omitted.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

🏃 View run random_forests at: http://127.0.0.1:8080/#/experiments/310904201466897775/runs/ae3b93c336d04a7f896aa94dd40b5f42
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/310904201466897775


almost perfect performance

In [40]:
xgb = XGBClassifier(random_state=42, verbosity=1, objective='multi:logistic', n_jobs=-1)

mlflow.autolog(
    log_models = False,
    log_datasets = False,
    log_input_examples = False
)

with mlflow.start_run(run_name="xgbclassifier"):
    params = {
        'n_estimators': [10, 25, 50],
        'max_depth': [3, 4, 5],
        'learning_rate': [0.001, 0.01, 0.1],
        'booster': ['gbtree', 'gblinear', 'dart'],
        'gamma': [0, 0.1, 0.5]
    }
    xgb_clf = GridSearchCV(
        estimator=xgb,
        param_grid=params,
        n_jobs=-1,
        cv=5,
        refit=True,
        verbose=True
        )

    xgb_clf.fit(
        X_train,
        y_train,
        eval_set=[(X_test, y_test)],
        verbose=False
        )
    
    y_pred_xgb = xgb_clf.predict(X_test)
    print(classification_report(y_test, y_pred_xgb))
    log_test_metrics(xgb_clf, X_test, y_test)

2025/04/16 12:36:11 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


Fitting 5 folds for each of 243 candidates, totalling 1215 fits


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 2026.37it/s] 
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 1684.07it/s] 
2025/04/16 12:36:22 INFO mlflow.sklearn.utils: Logging the 5 best runs, 238 runs will be omitted.


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      0.90      0.90        10
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30

🏃 View run xgbclassifier at: http://127.0.0.1:8080/#/experiments/310904201466897775/runs/1f527c264688400496bf788513a19b2b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/310904201466897775


Random forests outperformed xgboost. That means we have the winner. I'll use random forests for deploying.