In [1]:
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo 
from sklearn.model_selection import train_test_split


mushroom = fetch_ucirepo(id=73) 
X = mushroom.data.features 
y = mushroom.data.targets 


X=X.copy()
for col in X.columns:
    most_common = X[col].mode()
    X[col] = X[col].fillna(most_common[0])
   

#preventing dummy variable trap
X_encoded = pd.get_dummies(X, drop_first=True).astype(int)
y_encoded=pd.get_dummies(y,drop_first=True).astype(int)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        #gradient descent
        for i in range(self.n_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = sigmoid(linear_model)
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db


    def predict_prob(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return sigmoid(linear_model)


    def predict(self, X):
        probs = self.predict_prob(X)
        return np.where(probs >= 0.5, 1, 0)


    def cross_entropy(self, y_true, y_pred):
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss


x_train, x_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)


model = LogisticRegressionScratch(learning_rate=0.1, n_iterations=1000)
model.fit(x_train.values, y_train.values.ravel())  


y_pred = model.predict(x_test.values)



accuracy = np.mean(y_pred == y_test.values.ravel())
print(f"Accuracy: {accuracy*100:.2f}%")



Accuracy: 98.46%
