# Multinomial Logistic Regression Live Demo

This notebook introduces multinomial logistic regression using scikit-learn. We start by importing the core libraries and downloading a classic multi-class dataset to explore.

In [None]:

# Scientific computing stack
import numpy as np
import pandas as pd
import scipy

# Scikit-learn tools
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
# Download and cache the classic Iris dataset from OpenML
iris = fetch_openml("iris", version=1, as_frame=True)
X = iris.data
y = iris.target.astype("category")  # explicit categorical dtype keeps class labels tidy
iris_df = iris.frame

print(f"Features shape: {X.shape}")
print(f"Class breakdown: {y.value_counts().to_dict()}")
iris_df.head()


In [None]:
# print out the classes
iris_df["class"].unique()

In [None]:
# instantiate our model
model = LogisticRegression()

# split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

# verify sizes of training and test sets
X_train.shape, X_test.shape

In [None]:
# fit the model on training data
model.fit(X_train, y_train)

In [None]:
# inspect the fitted model's coefficients
model.coef_, model.intercept_

In [None]:
# put the model's coefficients and biases into separate variables
W = model.coef_
b = model.intercept_

# check the shapes before multiplying
print(f"Shape of matrix W is {W.shape}")
print(f"Shape of intercept b is {b.shape}")
print(f"Shape of test data is {X_test.shape}")

In [None]:
# apply our weights and biases to our test data
intermediate_output = X_test @ W.T + b
intermediate_output[:10]

In [None]:
# apply softmax to intermediate output
estimated_prob = scipy.special.softmax(intermediate_output, axis=1)

# estimated model probabilities for the first 10 examples in the test set
estimated_prob[:10,:]

In [None]:
# take the class with the highest probability as the class estimate
estimated_class = np.argmax(estimated_prob, axis=1) + 1
estimated_class[:10] 

In [None]:
classes = list(model.classes_)
print(f"The class labels are {classes}")

In [None]:
# for each example in the test set, identify the index of the true class
true_class = []
for i in np.arange(45):
    if y_test.values[i]==classes[0]:
        true_class.append(1)
    elif y_test.values[i]==classes[1]:
        true_class.append(2)
    else:
        true_class.append(3)

print(f"The first 10 true labels are:")
print(true_class[:10])


In [None]:
# measure the accuracy of the model on the test set using sklearn's score function
model.score(X_test, y_test)

In [None]:
# manually measure the test set performance
accuracy = np.mean(estimated_class==true_class)
print(f"The accuracy of the model on the test set is {accuracy:.3f}")