In [154]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, roc_curve, auc, classification_report
from sklearn.preprocessing import StandardScaler

# Binary Clissification

In [155]:

class LogisticRegression():

    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def sigmoid(self ,x):
        return 1/(1+np.exp(-x))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(linear_pred)

            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions-y)

            self.weights = self.weights - self.lr*dw
            self.bias = self.bias - self.lr*db

    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_pred)
        class_pred = [0 if y <= 0.5 else 1 for y in y_pred]
        return class_pred

In [156]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

# For binary classification, let's consider only two classes: setosa (class 0) and non-setosa (class 1)
y_binary = np.where(y == 0, 1, 0)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LogisticRegression(lr=0.01, n_iters=300)
model.fit(X_train, y_train)

predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7111111111111111


In [157]:
# Confusion Matrix
cm = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0 13  0]]


In [158]:
# Classification Report
print("Classification Report:")
print(classification_report(y_test, predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.50      1.00      0.67        13
           2       0.00      0.00      0.00        13

    accuracy                           0.71        45
   macro avg       0.50      0.67      0.56        45
weighted avg       0.57      0.71      0.61        45



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Multi-Classes Classification 

In [159]:
class MultiClassLogisticRegression:
    def __init__(self, learning_rate=0.05, num_iters=1000):
        self.learning_rate = learning_rate
        self.num_iters = num_iters
        
    def softmax(self,x):
        exp_x = np.exp(x) # should subtract max(X) to avoid exponential overshoot with large values
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def one_hot_encode(self,y, num_classes):
        num_samples = len(y)
        encoded = np.zeros((num_samples, num_classes))
        encoded[np.arange(num_samples), y] = 1
        return encoded

    def cross_entropy_loss(self,y_true, y_pred):
        epsilon = 1e-15 # to avoid numerical instability if probs are too close to zero or 1
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.sum(y_true * np.log(y_pred)) / len(y_true)

    def fit(self, X, y):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))

        self.weights = np.random.randn(num_features, num_classes)*0.01
        self.bias = np.zeros(num_classes)

        y_one_hot = self.one_hot_encode(y, num_classes)

        for iter in range(self.num_iters):
            scores = np.dot(X, self.weights) + self.bias
            probabilities = self.softmax(scores)
            loss = self.cross_entropy_loss(y_one_hot, probabilities)

            # Print loss during training (optional)
            if iter % 100 == 0:
                print(f"iter {iter}, Loss: {loss}")

            error = y_one_hot - probabilities

            gradient_weights = -np.dot(X.T, error) / num_samples
            gradient_bias = -np.sum(error, axis=0) / num_samples

            self.weights -= self.learning_rate * gradient_weights
            self.bias -= self.learning_rate * gradient_bias

    def predict(self, X):
        scores = np.dot(X, self.weights) + self.bias
        probabilities = self.softmax(scores)
        return np.argmax(probabilities, axis=1)

In [160]:
# Example usage with Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

model = MultiClassLogisticRegression(learning_rate=0.01, num_iters=300)
model.fit(X_train, y_train)

predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")

iter 0, Loss: 1.0923788771966247
iter 100, Loss: 0.7316204480049995
iter 200, Loss: 0.6067463048946952
Accuracy: 0.8666666666666667


In [161]:
# Confusion Matrix
cm = confusion_matrix(y_test,predictions)
print("Confusion Matrix:")
print(cm)


Confusion Matrix:
[[19  0  0]
 [ 0  7  6]
 [ 0  0 13]]


In [162]:
# Classification Report
print("Classification Report:")
print(classification_report(y_test, predictions))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.54      0.70        13
           2       0.68      1.00      0.81        13

    accuracy                           0.87        45
   macro avg       0.89      0.85      0.84        45
weighted avg       0.91      0.87      0.86        45

