In [2]:
# Cell 1

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

df = pd.read_csv("drug_200.csv")
df.head()


Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY


In [3]:
# Cell 2

# Encode target column
le = LabelEncoder()
df["Drug"] = le.fit_transform(df["Drug"])

# Encode categorical feature columns
for col in df.columns:
    if df[col].dtype == "object":
        df[col] = LabelEncoder().fit_transform(df[col])

# Features and labels
X = df.drop(columns=["Drug"]).values
y = df["Drug"].values.reshape(-1, 1)

# Normalize
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [4]:
# Cell 3

class LogisticRegressionScratch:
    def __init__(self, lr=0.1, epochs=3000):
        self.lr = lr
        self.epochs = epochs

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        m, n = X.shape
        self.W = np.zeros((n, 1))
        self.b = 0

        for _ in range(self.epochs):
            z = X @ self.W + self.b
            y_pred = self.sigmoid(z)

            dz = y_pred - y
            dW = (1/m) * (X.T @ dz)
            db = (1/m) * np.sum(dz)

            self.W -= self.lr * dW
            self.b -= self.lr * db

    def predict(self, X):
        y_pred = self.sigmoid(X @ self.W + self.b)
        return (y_pred >= 0.5).astype(int).flatten()


In [5]:
# Cell 4

model = LogisticRegressionScratch(lr=0.1, epochs=3000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


In [6]:
# Cell 5

def macro_f1(y_true, y_pred):
    classes = np.unique(y_true)
    f1_list = []

    for c in classes:
        tp = np.sum((y_true.flatten() == c) & (y_pred.flatten() == c))
        fp = np.sum((y_true.flatten() != c) & (y_pred.flatten() == c))
        fn = np.sum((y_true.flatten() == c) & (y_pred.flatten() != c))

        precision = tp / (tp + fp + 1e-10)
        recall = tp / (tp + fn + 1e-10)
        f1 = 2 * precision * recall / (precision + recall + 1e-10)
        f1_list.append(f1)

    return np.mean(f1_list)

print("Macro F1 Score:", macro_f1(y_test, y_pred))


Macro F1 Score: 0.027906976741460244


In [7]:
# Cell 6

def accuracy(y_true, y_pred):
    return np.sum(y_true.flatten() == y_pred.flatten()) / len(y_true)

print("Accuracy:", accuracy(y_test, y_pred))


Accuracy: 0.075
