# Multi class logisitic regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Exploratory Data Analysis and Visualization

In [None]:
df = pd.read_csv('iris.csv')
df.head()

In [None]:
df['species'].value_counts()

In [None]:
sns.scatterplot(data=df, x='sepal_length', y='sepal_width', hue='species')

In [None]:
sns.scatterplot(data=df, x='petal_length', y='petal_width', hue='species')

In [None]:
sns.pairplot(df, hue='species')

In [None]:
sns.heatmap(df.corr(numeric_only=True), annot=True)

## Train and Test Splits

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df.drop('species', axis=1)
y = df['species']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=99)

## Scaling the data

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

## Creating the model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [None]:
log_model = LogisticRegression(multi_class="ovr", solver='saga', max_iter=5000)

In [None]:
penalty = ['l1', 'l2']
C = np.logspace(0, 4, 10)

grid_model = GridSearchCV(log_model, param_grid={'C': C, 'penalty': penalty })

In [None]:
grid_model.fit(scaled_X_train, y_train)

In [None]:
grid_model.best_params_

## Model Performance

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay

In [None]:
grid_model.best_params_

### Predictions on the test data

In [None]:
y_pred = grid_model.predict(scaled_X_test)

### Accuracy

In [None]:
accuracy_score(y_test, y_pred)

### Confusion matrix

In [None]:
cnf_matrix = confusion_matrix(y_test, y_pred)
cnf_matrix

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cnf_matrix)
disp.plot()

### Classification report

In [None]:
print(classification_report(y_test, y_pred))

### ROC curve and AUC

In [None]:
from sklearn.metrics import roc_curve, auc

In [None]:
def plot_multiclass_roc(clf, X_test, y_test, n_classes, figsize=(5, 5)):
    y_score = clf.decision_function(X_test)

    # structures
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # calculate dummies once
    y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_dummies[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # roc for each class
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic example')
    for i in range(n_classes):
        ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
    ax.legend(loc="best")
    ax.grid(alpha=.4)
    sns.despine()
    plt.show()

In [None]:
plot_multiclass_roc(grid_model, scaled_X_test, y_test, n_classes=3, figsize=(16, 10))