# iris dataset
The Iris dataset was used in R.A. Fisher's classic 1936 paper, The Use of Multiple Measurements in Taxonomic Problems, and can also be found on the UCI Machine Learning Repository.

It includes three iris species with 50 samples each as well as some properties about each flower. One flower species is linearly separable from the other two, but the other two are not linearly separable from each other.

The columns in this dataset are:

1. Id
2. SepalLengthCm
3. SepalWidthCm
4. PetalLengthCm
5. PetalWidthCm
6. Species

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df=pd.read_csv("iris.csv")

In [3]:
df.head()

In [4]:
len(df)

In [5]:
df.info()  # species are in string

In [6]:
df.describe()

In [7]:
df['species'].value_counts()

# visualizing features & relationship among them

In [8]:
sns.countplot(x='species',data=df)

In [9]:
sns.scatterplot(x='petal_width',y='petal_length',data=df,hue='species') 
#  setosa is quite separated from both but there are some commons among versicolor and virginica

In [10]:
sns.pairplot(data=df,hue='species')

In [11]:
#  visualizing correlation
sns.heatmap(df.corr(numeric_only=True),annot=True)

In [12]:
# developing the model
X=df.drop('species',axis=1)

In [13]:
y=df['species']

In [14]:
y   # actually we don't need to encode target string values to integers , sklearn is totally okay with these. 

In [15]:
#  train - test split
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

In [17]:
# scaling the data
from sklearn.preprocessing import StandardScaler

In [18]:
scaler=StandardScaler()

In [19]:
scaler.fit(X_train)

In [20]:
scaled_X_train=scaler.transform(X_train)

In [21]:
scaled_X_test=scaler.transform(X_test)

# using grid search to figure out the best hyper parameters & then evaluating its performance

In [22]:
#  we can choose to perform cross validation all in the model in one simple call Or
#  you can keep the things as modular & separating out your model from grid search process

# Multiclass classification -> How it works and different from single class classification?
It donot check and outputs as 0 or 1 .Actually it takes one vs rest way of figuring out the multiclass problem. At one time it build model to separate  one class from all others then switch to the next class & separate out that class from all the others & so on.     parameter-> multi_class='ovr'

In [23]:
from sklearn.linear_model import LogisticRegression

In [24]:
log_model=LogisticRegression(solver='saga',multi_class='ovr',max_iter=5000)
#  in previous models we don't increase the max_iter , since there we donot have enough iterations for gradient descent to find
# minimum

In [25]:
from sklearn.model_selection import GridSearchCV

In [26]:
penalty=['l1','l2','elasticnet']
l1_ratio=np.linspace(0,1,20)
C=np.logspace(0,1,20)
param_grid={'penalty':penalty,'l1_ratio':l1_ratio,'C':C}

In [27]:
grid_model=GridSearchCV(log_model,param_grid)

In [28]:
import warnings

In [29]:
warnings.filterwarnings('ignore')

In [30]:
grid_model.fit(scaled_X_train,y_train)

In [31]:
grid_model.best_params_

In [32]:
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report,ConfusionMatrixDisplay

In [33]:
y_pred=grid_model.predict(scaled_X_test)

In [34]:
y_pred

In [35]:
accuracy_score(y_test,y_pred)

In [36]:
cm=confusion_matrix(y_test,y_pred)

In [37]:
cm

In [38]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=grid_model.classes_)
disp.plot()

# we can't draw roc curve automatically for multi-class classification

In [39]:
print(classification_report(y_test,y_pred))

In [40]:
#  copy the function for multiclass ROC curve and call it

In [41]:
from sklearn.metrics import roc_curve,auc

In [42]:
def plot_multiclass_roc(clf, X_test, y_test, n_classes, figsize=(5,5)):
    y_score = clf.decision_function(X_test)

    # structures
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # calculate dummies once
    y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_dummies[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # roc for each class
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic example')
    for i in range(n_classes):
        ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
    ax.legend(loc="best")
    ax.grid(alpha=.4)
    sns.despine()
    plt.show()

In [43]:
plot_multiclass_roc(grid_model,scaled_X_test,y_test,n_classes=3)