In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

df = pd.read_csv("drug_200.csv")

for col in df.columns:
    if df[col].dtype == "object":
        df[col] = LabelEncoder().fit_transform(df[col])

X = df.drop("Drug", axis=1).values
y = df["Drug"].values

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


class LogisticRegressionScratch:
    def __init__(self, lr=0.01, epochs=2000, reg="none", lambda_=0.1, alpha=0.5):
        self.lr = lr
        self.epochs = epochs
        self.reg = reg
        self.lambda_ = lambda_
        self.alpha = alpha  # for elastic net

    def fit(self, X, y):
        n, d = X.shape
        self.w = np.zeros(d)
        self.b = 0

        for _ in range(self.epochs):
            z = X @ self.w + self.b
            p = sigmoid(z)

            dw = (X.T @ (p - y)) / n
            db = np.sum(p - y) / n

            # Regularization:-
            if self.reg == "l2":  # Ridge
                dw += (self.lambda_ / n) * self.w

            elif self.reg == "l1":  # Lasso
                dw += (self.lambda_ / n) * np.sign(self.w)

            elif self.reg == "elastic":  # Elasticnet
                dw += (self.lambda_ / n) * (self.alpha * np.sign(self.w) + (1 - self.alpha) * self.w)

            self.w -= self.lr * dw
            self.b -= self.lr * db

    def predict(self, X):
        return (sigmoid(X @ self.w + self.b) >= 0.5).astype(int)
def metrics(y_true, y_pred):
    acc = np.mean(y_true == y_pred)

    TP = np.sum((y_true == 1) & (y_pred == 1))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))

    precision = TP / (TP + FP + 1e-9)
    recall = TP / (TP + FN + 1e-9)
    f1 = (2 * precision * recall) / (precision + recall + 1e-9)

    return acc, precision, recall, f1
def evaluate(model, X, y):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    scores = []

    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        scores.append(metrics(y_test, y_pred))

    return np.mean(scores, axis=0)
models = {
    "No Regularization": LogisticRegressionScratch(reg="none"),
    "L1 (Lasso)": LogisticRegressionScratch(reg="l1", lambda_=0.1),
    "L2 (Ridge)": LogisticRegressionScratch(reg="l2", lambda_=0.1),
    "Elastic Net": LogisticRegressionScratch(reg="elastic", lambda_=0.1, alpha=0.5)
}

for name, model in models.items():
    acc, p, r, f1 = evaluate(model, X, y)
    print(f"\n{name}:")
    print(f"Accuracy  = {acc:.4f}")
    print(f"Precision = {p:.4f}")
    print(f"Recall    = {r:.4f}")
    print(f"F1 Score  = {f1:.4f}")



No Regularization:
Accuracy  = 0.0800
Precision = 0.5024
Recall    = 1.0000
F1 Score  = 0.6376

L1 (Lasso):
Accuracy  = 0.0800
Precision = 0.5024
Recall    = 1.0000
F1 Score  = 0.6376

L2 (Ridge):
Accuracy  = 0.0800
Precision = 0.5024
Recall    = 1.0000
F1 Score  = 0.6376

Elastic Net:
Accuracy  = 0.0800
Precision = 0.5024
Recall    = 1.0000
F1 Score  = 0.6376


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from collections import Counter
df = pd.read_csv("drug_200.csv")
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = LabelEncoder().fit_transform(df[col])

X = df.drop("Drug", axis=1).values
y = df["Drug"].values
class KNNClassifier:
    def __init__(self, k):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = []
        for x in X:
            distances = np.sqrt(np.sum((self.X_train - x) ** 2, axis=1))
            idx = np.argsort(distances)[:self.k]
            neighbors = self.y_train[idx]
            pred = Counter(neighbors).most_common(1)[0][0]
            predictions.append(pred)
        return np.array(predictions)

def compute_metrics(y_true, y_pred):
    acc = np.mean(y_true == y_pred)

    TP = np.sum((y_true == 1) & (y_pred == 1))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))

    precision = TP / (TP + FP + 1e-9)
    recall = TP / (TP + FN + 1e-9)
    f1 = (2 * precision * recall) / (precision + recall + 1e-9)

    return acc, precision, recall, f1

def evaluate_k(k):
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    results = []

    for train_idx, test_idx in kfold.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = KNNClassifier(k=k)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        results.append(compute_metrics(y_test, y_pred))

    return np.mean(results, axis=0)
results = {}

for k in [1, 3, 5]:
    acc, p, r, f1 = evaluate_k(k)
    results[k] = {"Accuracy": acc, "Precision": p, "Recall": r, "F1": f1}

    print(f"\nK = {k} ")
    print(f"Accuracy  = {acc:.4f}")
    print(f"Precision = {p:.4f}")
    print(f"Recall    = {r:.4f}")
    print(f"F1 Score  = {f1:.4f}")

print("\n\nTable")
print("K | Accuracy | Precision | Recall | F1 Score")

for k, metrics in results.items():
    print(f"{k} | {metrics['Accuracy']:.4f} | {metrics['Precision']:.4f} | {metrics['Recall']:.4f} | {metrics['F1']:.4f}")

best_k = max(results, key=lambda k: results[k]["Accuracy"])
best_acc = results[best_k]["Accuracy"]

print("\n\n Best K value:")
print(f"K = {best_k} has the highest Accuracy = {best_acc:.4f}")


print("\nConclusion:")
print("Based on the evaluation, K =", best_k, "is the best value for this dataset.")




K = 1 
Accuracy  = 0.7350
Precision = 0.8000
Recall    = 0.8000
F1 Score  = 0.8000

K = 3 
Accuracy  = 0.7450
Precision = 0.8000
Recall    = 0.8000
F1 Score  = 0.8000

K = 5 
Accuracy  = 0.7250
Precision = 0.8000
Recall    = 0.8000
F1 Score  = 0.8000


Table
K | Accuracy | Precision | Recall | F1 Score
1 | 0.7350 | 0.8000 | 0.8000 | 0.8000
3 | 0.7450 | 0.8000 | 0.8000 | 0.8000
5 | 0.7250 | 0.8000 | 0.8000 | 0.8000


 Best K value:
K = 3 has the highest Accuracy = 0.7450

Conclusion:
Based on the evaluation, K = 3 is the best value for this dataset.
