In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [6]:
df = pd.read_csv('agaricus-lepiota.data', header=None)

labelencoder = LabelEncoder()
for col in df.columns:
    df[col] = labelencoder.fit_transform(df[col])

X = df.iloc[:, 1:]
y = df.iloc[:, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y, y_hat):
    m = len(y)
    return -1/m * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))

def gradient_descent(X, y, w, b, learning_rate):
    m = len(y)
    y_hat = sigmoid(np.dot(X, w) + b)
    dw = 1/m * np.dot(X.T, (y_hat - y))
    db = 1/m * np.sum(y_hat - y)
    w -= learning_rate * dw
    b -= learning_rate * db
    return w, b

In [8]:
learning_rate = 0.01
n_iterations = 1000
n_features = X_train.shape[1]

w = np.zeros((n_features, 1))
b = 0

y_train = y_train.values.reshape(-1, 1)

for i in range(n_iterations):
    w, b = gradient_descent(X_train, y_train, w, b, learning_rate)
    if i % 100 == 0:
        print("Loss after iteration {}: {}".format(i, compute_loss(y_train, sigmoid(np.dot(X_train, w) + b))))

Loss after iteration 0: 0.6886093198465015
Loss after iteration 100: 0.44252145011725397
Loss after iteration 200: 0.3584354190605972
Loss after iteration 300: 0.31772990151614033
Loss after iteration 400: 0.2937939432538207
Loss after iteration 500: 0.2779680297340353
Loss after iteration 600: 0.2666682822778348
Loss after iteration 700: 0.2581539297450821
Loss after iteration 800: 0.2514779698152658
Loss after iteration 900: 0.2460803275018806


In [9]:
y_pred = sigmoid(np.dot(X_test, w) + b) >= 0.5

accuracy = np.mean(y_pred.flatten() == y_test)
print(f'Accuracy: {accuracy * 100}%')

Accuracy: 90.95384615384616%
