<a href="https://colab.research.google.com/github/Gibsdevops/Machine-learning-crash-course-/blob/main/logistic_reg_iris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd

In [2]:
iris = pd.read_csv("IRIS.csv")

In [3]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
iris["species"].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [9]:
#encoding the target column, the species
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
iris["species"] = label_encoder.fit_transform(iris["species"])

In [11]:
#let's see whether our dataset has been transformed
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [12]:
#Define the features and the targets
X = iris.drop(columns=["species"])
y = iris["species"]

In [13]:
#splitting the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)



In [14]:
#normalize the features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [16]:
#train a logistic regression model

from sklearn.linear_model import LogisticRegression

model = LogisticRegression(multi_class="multinomial", solver="lbfgs", max_iter=200)
model.fit(X_train, y_train)



In [25]:
#model evaluation
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accurancy: {accuracy:.2f}") #2dp

Model Accurancy: 0.97


In [27]:
#clasifiaction report

from sklearn.metrics import classification_report

report = classification_report(y_test, y_pred)

print(f"Classification report: \n{report}")

Classification report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



In [30]:
#Tune hyperparameters using GridSearchCV

from sklearn.model_selection import GridSearchCV
#c = regularization strength
#max_iter = the number of iterations

param_grid = {
    "C": [0.01, 0.1, 1, 10, 100],
    "max_iter": [100, 200, 300, 500]
}

#perform gridsearch using cross validation
grid_search = GridSearchCV(model, param_grid, cv=5, scoring="accuracy", n_jobs=-1)
grid_search.fit(X_train, y_train)



In [31]:
#get the best hyperparamters
best_params = grid_search.best_params_
print(f"Best hyperparameters: {best_params}")

Best hyperparameters: {'C': 10, 'max_iter': 100}


In [32]:
#train model with those parameters
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)



In [33]:
#make predictions and evaluate
y_pred_best = best_model.predict(X_test)

In [35]:
best_model_accuracy = accuracy_score(y_test, y_pred_best)
print(f"Best model accuracy: {best_model_accuracy:.2f}")

Best model accuracy: 0.97


In [36]:
#saving the model
import joblib

joblib.dump(best_model, "Logistic_regression_model.pkl")

print("model saved succesfully")

model saved succesfully
