In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd

# Load and split the dataset
iris = load_iris()
X = pd.DataFrame(data=iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train)
print(y_train)
print(iris.target_names)

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
22                 4.6               3.6                1.0               0.2
15                 5.7               4.4                1.5               0.4
65                 6.7               3.1                4.4               1.4
11                 4.8               3.4                1.6               0.2
42                 4.4               3.2                1.3               0.2
..                 ...               ...                ...               ...
71                 6.1               2.8                4.0               1.3
106                4.9               2.5                4.5               1.7
14                 5.8               4.0                1.2               0.2
92                 5.8               2.6                4.0               1.2
102                7.1               3.0                5.9               2.1

[120 rows x 4 columns]
22     0
15     0
65     1
11     0
42  

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib

# Train the model
model = LogisticRegression(max_iter=10)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)

print(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")

# Save the model
joblib.dump(model, "./pretrainedmodels/logistic_regression_model10.pkl")
print("Model saved successfully!")


     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
73                 6.1               2.8                4.7               1.2
18                 5.7               3.8                1.7               0.3
118                7.7               2.6                6.9               2.3
78                 6.0               2.9                4.5               1.5
76                 6.8               2.8                4.8               1.4
31                 5.4               3.4                1.5               0.4
64                 5.6               2.9                3.6               1.3
141                6.9               3.1                5.1               2.3
68                 6.2               2.2                4.5               1.5
82                 5.8               2.7                3.9               1.2
110                6.5               3.2                5.1               2.0
12                 4.8               3.0                1.4     

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [6]:
import mlflow
import mlflow.sklearn

# Start an MLflow run
with mlflow.start_run(run_name="logistic_regression") as run:
    # Log parameters and metrics
    mlflow.log_param("max_iter", 1000)
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))

    # Log the model
    mlflow.sklearn.log_model(model, "iris_model")

