In [69]:
import seaborn as sns
import pandas as pd
import numpy as np

pd.set_option("display.max_columns",None)

In [70]:
data = sns.load_dataset("iris")

In [71]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [72]:
data.species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [73]:
df = data[data["species"] != 'setosa']

In [74]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,versicolor
51,6.4,3.2,4.5,1.5,versicolor
52,6.9,3.1,4.9,1.5,versicolor
53,5.5,2.3,4.0,1.3,versicolor
54,6.5,2.8,4.6,1.5,versicolor


In [75]:
df["species"] = df["species"].map({'versicolor':0,'virginica':1}).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["species"] = df["species"].map({'versicolor':0,'virginica':1}).astype(int)


In [76]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0


# spliting the dataset into independent and dependent features

In [77]:
x = df.drop("species",axis=1)
y = df["species"]

# train test split

In [78]:
from sklearn.model_selection import train_test_split

In [79]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

# Base model

In [80]:
from sklearn.linear_model import LogisticRegression

In [81]:
base_classifier = LogisticRegression()

In [82]:
base_classifier.fit(x_train,y_train)

In [83]:
y_pred = base_classifier.predict(x_test)

# evaluation of base classifier

In [84]:
from sklearn.metrics import accuracy_score,classification_report

In [85]:
accuracy = accuracy_score(y_test,y_pred)
accuracy

0.92

In [86]:
report = classification_report(y_test,y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.93      0.93      0.93        14
           1       0.91      0.91      0.91        11

    accuracy                           0.92        25
   macro avg       0.92      0.92      0.92        25
weighted avg       0.92      0.92      0.92        25



# Hyperparameter tuning

In [87]:
from sklearn.model_selection import GridSearchCV

In [88]:
params = {"penalty":["l2","l2","elasticnet"],"C":[1,2,3,4,5,6,7,8,9,10]}

In [89]:
tunned_classifier = GridSearchCV(LogisticRegression(),params,scoring="accuracy",cv=5)

In [90]:
tunned_classifier.fit(x_train,y_train)

50 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1169, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 56, in _check_solver
   

In [91]:
tunned_classifier.best_params_

{'C': 1, 'penalty': 'l2'}

In [92]:
tunned_classifier.best_score_

0.9733333333333334

In [93]:
pred = tunned_classifier.predict(x_test)

In [94]:
accuracy = accuracy_score(y_test,pred)
accuracy

0.92

In [95]:
report = classification_report(y_test,pred)
print(report)

              precision    recall  f1-score   support

           0       0.93      0.93      0.93        14
           1       0.91      0.91      0.91        11

    accuracy                           0.92        25
   macro avg       0.92      0.92      0.92        25
weighted avg       0.92      0.92      0.92        25

