Implementing Multi level Classification

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

In [11]:
# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=10)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Training shape:", X_train.shape)
print("Test shape:", X_test.shape)

Training shape: (120, 4)
Test shape: (30, 4)


In [12]:
class MultinomialLogisticRegression:
    def __init__(self, learning_rate=0.001, max_iter=1000, tol=1e-4):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol          # Tolerance for early stopping
        self.weights = None     # Weight matrix (n_features x n_classes)
        self.bias = None        # Bias vector (n_classes,)

    def softmax(self, z):
        # Numerically stable softmax
        z_exp = np.exp(z - np.max(z, axis=1, keepdims=True))
        return z_exp / np.sum(z_exp, axis=1, keepdims=True)

    def fit(self, X, y):
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))

        # Initialize weights and bias
        self.weights = np.zeros((n_features, n_classes))
        self.bias = np.zeros(n_classes)

        # Convert labels to one-hot encoding
        y_onehot = np.eye(n_classes)[y]

        # Gradient Descent
        prev_loss = float('inf')
        for epoch in range(self.max_iter):
            # Forward pass
            linear_model = np.dot(X, self.weights) + self.bias
            probabilities = self.softmax(linear_model)

            # Compute cross-entropy loss
            loss = -np.mean(np.sum(y_onehot * np.log(probabilities + 1e-15), axis=1))
            
            # Check for early stopping
            if abs(prev_loss - loss) < self.tol:
                print(f"Early stopping at epoch {epoch}")
                break
            prev_loss = loss

            # Backward pass: compute gradients
            gradient = (probabilities - y_onehot) / n_samples
            grad_weights = np.dot(X.T, gradient)
            grad_bias = np.sum(gradient, axis=0)

            # Update parameters
            self.weights -= self.learning_rate * grad_weights
            self.bias -= self.learning_rate * grad_bias

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        probabilities = self.softmax(linear_model)
        return np.argmax(probabilities, axis=1)

In [13]:
# Initialize and train the model
model = MultinomialLogisticRegression(learning_rate=0.1, max_iter=1000)
model.fit(X_train, y_train)

Early stopping at epoch 713


In [14]:
# Predictions
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

In [15]:
# Evaluate
print("\nTraining Accuracy:", accuracy_score(y_train, y_pred_train))
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_test))


Training Accuracy: 0.9666666666666667
Test Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0 13  0]
 [ 0  0  7]]
