<a href="https://colab.research.google.com/github/LexanderThakur/LogisticRegression/blob/main/LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

def sigmoid(z):
  return 1/(1+np.exp(-z))

def initialize(n_features):
  weights=np.zeros(n_features)
  bias=0;
  return weights,bias

def propogate(X, y, weights, bias):
    m = X.shape[0]
    A = sigmoid(np.dot(X, weights) + bias)
    epsilon = 1e-15
    A = np.clip(A, epsilon, 1 - epsilon)

    cost = (-1 / m) * np.sum(y * np.log(A) + (1 - y) * np.log(1 - A))
    dw = (1 / m) * np.dot(X.T, (A - y))
    db = (1 / m) * np.sum(A - y)

    return dw, db, cost

def train(X,y,lr=0.1,n_iterations=1000):
  weights,bias=initialize(X.shape[1])

  for i in range(0,1000):
    dw,db,cost=propogate(X,y,weights,bias)
    weights=weights-lr*dw
    bias=bias-lr*db

  return weights,bias


def predict(X,weights,bias):
  A=sigmoid(bias+np.dot(X,weights))

  return (A>=0.5).astype(int)

def accuracy(y_true,y_pred):

  return np.mean(y_true==y_pred)


from sklearn.datasets import make_classification

X1, y = make_classification(n_samples=500, n_features=2, n_informative=2, n_redundant=0, n_classes=2, random_state=42)


from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X1)


weights,bias=train(X,y)
y_pred=predict(X,weights,bias)
acc=accuracy(y,y_pred)
print(f"Training Accuracy= {acc*100:.2f}%")

Training Accuracy= 89.00%


Multiclass using soft max

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# 1. Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # for numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# 2. Initialize weights and bias
def initialize(n_features, n_classes):
    weights = np.zeros((n_features, n_classes))
    bias = np.zeros((1, n_classes))
    return weights, bias

# 3. Forward and backward propagation
def propagate(X, y_onehot, weights, bias):
    m = X.shape[0]
    z = np.dot(X, weights) + bias
    A = softmax(z)

    cost = (-1 / m) * np.sum(y_onehot * np.log(A + 1e-15))  # Add epsilon for stability

    dw = (1 / m) * np.dot(X.T, (A - y_onehot))
    db = (1 / m) * np.sum(A - y_onehot, axis=0, keepdims=True)

    return dw, db, cost

# 4. Training function
def train(X, y, lr=0.1, n_iterations=1000):
    n_samples, n_features = X.shape
    n_classes = len(np.unique(y))

    weights, bias = initialize(n_features, n_classes)

    encoder = OneHotEncoder(sparse_output=False)
    y_onehot = encoder.fit_transform(y.reshape(-1, 1))

    for i in range(n_iterations):
        dw, db, cost = propagate(X, y_onehot, weights, bias)
        weights -= lr * dw
        bias -= lr * db

        if i % 100 == 0:
            print(f"Iteration {i}: Cost = {cost:.4f}")

    return weights, bias

# 5. Predict function
def predict(X, weights, bias):
    z = np.dot(X, weights) + bias
    A = softmax(z)
    return np.argmax(A, axis=1)

# 6. Accuracy function
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# 7. Generate multi-class data
X_raw, y = make_classification(
    n_samples=500,
    n_features=4,
    n_informative=4,
    n_redundant=0,
    n_classes=3,
    n_clusters_per_class=1,
    random_state=42
)

# 8. Preprocess
scaler = StandardScaler()
X = scaler.fit_transform(X_raw)

# 9. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 10. Train the model
weights, bias = train(X_train, y_train, lr=0.1, n_iterations=1000)

# 11. Evaluate
y_pred_train = predict(X_train, weights, bias)
y_pred_test = predict(X_test, weights, bias)

print(f"Train Accuracy: {accuracy(y_train, y_pred_train)*100:.2f}%")
print(f"Test Accuracy: {accuracy(y_test, y_pred_test)*100:.2f}%")


Iteration 0: Cost = 1.0986
Iteration 100: Cost = 0.5921
Iteration 200: Cost = 0.5698
Iteration 300: Cost = 0.5652
Iteration 400: Cost = 0.5638
Iteration 500: Cost = 0.5632
Iteration 600: Cost = 0.5629
Iteration 700: Cost = 0.5628
Iteration 800: Cost = 0.5628
Iteration 900: Cost = 0.5627
Train Accuracy: 76.25%
Test Accuracy: 74.00%


Using scikit learn


In [2]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = make_classification(
    n_samples=500,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test set accuracy: {accuracy * 100:.2f}%")


Test set accuracy: 88.00%
