In [165]:
import numpy as np
import matplotlib.pyplot as plt # Import for the new plot_loss method

class MultinomialLogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000, penalty='none', alpha=0.1, l1_ratio=0.5):
        self.lr = learning_rate
        self.epochs = epochs
        self.penalty = penalty
        self.alpha = alpha
        self.l1_ratio = l1_ratio

        if not (0 <= self.l1_ratio <= 1):
            raise ValueError("l1_ratio must be between 0 and 1.")

        self.weights = None  # Shape (n_features, n_classes)
        self.bias = None     # Shape (1, n_classes)
        self.losses = []     # To track loss history
        self.n_classes = None  # Number of unique classes

    def _softmax(self, z):
        z_stable = z - np.max(z, axis=1, keepdims=True)
        exp_z = np.exp(z_stable)
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def fit(self, X, y):
        print(f"Input X shape: {X.shape}")
        n_samples, n_features = X.shape

        y_one_hot = y
        self.n_classes = y_one_hot.shape[1]

        # --- IMPROVEMENT 1: Random Weight Initialization ---
        # Initialize weights with a small random distribution (e.g., small normal)
        # This helps break symmetry and is often better than all zeros.
        std_dev = 1.0 / np.sqrt(n_features) # Simple scaling for initial weights
        self.weights = np.random.randn(n_features, self.n_classes) * std_dev
        self.bias = np.zeros((1, self.n_classes))

        self.losses = []

        # Gradient Descent
        for i in range(self.epochs):
            # ... (Logits, Softmax, Loss calculation remains the same) ...
            z = np.dot(X, self.weights) + self.bias
            y_pred = self._softmax(z)

            epsilon = 1e-9
            y_pred_clipped = np.clip(y_pred, epsilon, 1 - epsilon)
            loss = (-1 / n_samples) * np.sum(y_one_hot * np.log(y_pred_clipped))

            reg_loss = 0
            if self.penalty == 'l2':
                reg_loss = (self.alpha / 2) * np.sum(np.square(self.weights))
            elif self.penalty == 'l1':
                reg_loss = self.alpha * np.sum(np.abs(self.weights))
            elif self.penalty == 'elasticnet':
                l1_term = self.alpha * self.l1_ratio * np.sum(np.abs(self.weights))
                l2_term = (self.alpha * (1 - self.l1_ratio) / 2) * np.sum(np.square(self.weights))
                reg_loss = l1_term + l2_term

            total_loss = loss + reg_loss
            self.losses.append(total_loss)

            # ... (Gradient calculation remains the same) ...
            error = y_pred - y_one_hot
            dw = (1 / n_samples) * np.dot(X.T, error)
            db = (1 / n_samples) * np.sum(error, axis=0, keepdims=True)

            if self.penalty == 'l2':
                dw += self.alpha * self.weights
            elif self.penalty == 'l1':
                dw += self.alpha * np.sign(self.weights)
            elif self.penalty == 'elasticnet':
                dw_l1 = self.alpha * self.l1_ratio * np.sign(self.weights)
                dw_l2 = self.alpha * (1 - self.l1_ratio) * self.weights
                dw += (dw_l1 + dw_l2)

            # Update Weights and Bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            if i % (self.epochs // 10) == 0:
                print(f"Epoch {i}, Loss: {total_loss:.4f}")

        print(f"Final Loss: {self.losses[-1]:.4f}")


    def predict_proba(self, X):
        if self.weights is None:
            raise ValueError("Model has not been trained yet. Call .fit() first.")
        z = np.dot(X, self.weights) + self.bias
        return self._softmax(z)

    def predict(self, X):
        probabilities = self.predict_proba(X)
        return np.argmax(probabilities, axis=1)

    # --- IMPROVEMENT 2: Add a scikit-learn compatible score method ---
    def score(self, X, y_one_hot):
        """
        Calculates the accuracy of the model.
        Accepts one-hot encoded true labels (y_one_hot) and converts them internally.
        """
        from sklearn.metrics import accuracy_score

        # 1. Convert one-hot encoded true labels to integer labels
        y_true_labels = np.argmax(y_one_hot, axis=1)

        # 2. Get integer predictions from the model
        y_pred_labels = self.predict(X)

        # 3. Return the accuracy score
        return accuracy_score(y_true_labels, y_pred_labels)

    # --- IMPROVEMENT 3: Add a method to plot the loss history ---
    def plot_loss(self):
        """Plots the training loss history for diagnostics."""
        if not self.losses:
            print("No loss history recorded. Run .fit() first.")
            return

        plt.figure(figsize=(10, 6))
        plt.plot(self.losses)
        plt.title('Training Loss Over Epochs')
        plt.xlabel('Epoch')
        plt.ylabel(f'Loss (with {self.penalty} regularization)')
        plt.grid(True)
        plt.show()

In [166]:
import pandas as pd
df=pd.read_csv('drug_200.csv')
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY


In [167]:
df.isnull().sum()

Unnamed: 0,0
Age,0
Sex,0
BP,0
Cholesterol,0
Na_to_K,0
Drug,0


In [168]:
df=pd.get_dummies(df,columns=['Sex','BP','Cholesterol'],drop_first=True)
df.head()

Unnamed: 0,Age,Na_to_K,Drug,Sex_M,BP_LOW,BP_NORMAL,Cholesterol_NORMAL
0,23,25.355,drugY,False,False,False,False
1,47,13.093,drugC,True,True,False,False
2,47,10.114,drugC,True,True,False,False
3,28,7.798,drugX,False,False,True,False
4,61,18.043,drugY,False,True,False,False


In [169]:
X=df.drop('Drug',axis=1)
y=df['Drug']

In [170]:
y=pd.get_dummies(y,drop_first=True)

In [171]:
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Convert boolean columns to integers before converting to NumPy array
X_train = X_train.astype(int).to_numpy()
X_test = X_test.astype(int).to_numpy()
y_train=np.array(y_train)

In [172]:
from sklearn.preprocessing import StandardScaler

# 1. Initialize the Scaler
scaler = StandardScaler()

# 2. Fit the scaler ONLY on the training data
# This step calculates the mean and standard deviation from X_train.
scaler.fit(X_train)

# 3. Transform both the training and test data
# The scaler uses the mean/std learned from X_train to transform both sets.
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Update your variables to use the scaled data for training and prediction
X_train = X_train_scaled
X_test = X_test_scaled

In [173]:
# Change the learning rate to a smaller value
model=MultinomialLogisticRegression(learning_rate=0.001, epochs=20000, penalty='l2', alpha=0.1, l1_ratio=0.1)
model.fit(X_train,y_train)

Input X shape: (160, 6)
Epoch 0, Loss: 1.3933
Epoch 2000, Loss: 0.8415
Epoch 4000, Loss: 0.7094
Epoch 6000, Loss: 0.6639
Epoch 8000, Loss: 0.6449
Epoch 10000, Loss: 0.6365
Epoch 12000, Loss: 0.6328
Epoch 14000, Loss: 0.6316
Epoch 16000, Loss: 0.6314
Epoch 18000, Loss: 0.6318
Final Loss: 0.6325


In [174]:
import numpy as np
from sklearn.metrics import accuracy_score

# --- Calculate Training Accuracy ---
# FIX: Convert the one-hot encoded y_train back to integer labels
# np.argmax finds the column index of the '1' in each row, which is the class index.
y_train_labels = np.argmax(y_train, axis=1)

# Get predictions (which are already in integer label format)
y_train_pred = model.predict(X_train)

# Calculate accuracy using the corrected true labels
train_accuracy = accuracy_score(y_train_labels, y_train_pred)
print(f"Training Accuracy: {train_accuracy:.4f}")

# --- Calculate Test Accuracy ---
# Get predictions for the test set
y_test_pred = model.predict(X_test)

# FIX: Convert the one-hot encoded y_test back to integer labels
y_test_labels = np.argmax(y_test, axis=1)

# Calculate Test Accuracy
test_accuracy = accuracy_score(y_test_labels, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Training Accuracy: 0.7937
Test Accuracy: 0.7000


# Task
Train and evaluate a `MultinomialLogisticRegression` model with 'l1', 'l2', and 'elasticnet' regularization, calculating precision, recall, and F1-score for each.

## Iterate through regularization types

### Subtask:
Define a loop to iterate through 'l1', 'l2', and 'elasticnet'.


**Reasoning**:
The subtask requires defining a list of regularization types and starting a loop to iterate through them. This can be done in a single code block.



In [175]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import KFold

# --- K-FOLD CROSS-VALIDATION SETUP ---
# Initialize KFold with 5 splits (k=5)
# shuffle=True is used for randomly mixing the data before splitting
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define a list of regularization types
regularization_types = ['l1', 'l2', 'elasticnet']

# Iterate through each regularization type
for reg_type in regularization_types:
    print(f"\n--- Running 5-Fold CV with {reg_type} regularization ---")

    # Lists to store metrics from each fold
    accuracy_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    # Iterate over the 5 folds using the training data
    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        # 1. Split data for the current fold (using the scaled X_train if you applied scaling)
        X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        # 2. Initialize and Train the Model (using stable hyperparameters)
        model = MultinomialLogisticRegression(
            learning_rate=0.25, # Corrected learning rate for convergence
            epochs=3000,          # Corrected epochs
            penalty=reg_type,
            alpha=0.1,
            l1_ratio=0.
        )
        # Train on the fold's training data
        model.fit(X_fold_train, y_fold_train)

        # 3. Predict on the Validation Fold
        y_fold_pred = model.predict(X_fold_val)

        # 4. Convert the one-hot encoded y_fold_val back to integer labels
        y_fold_labels = np.argmax(y_fold_val, axis=1)

        # 5. Calculate and store evaluation metrics
        accuracy = accuracy_score(y_fold_labels, y_fold_pred)
        precision = precision_score(y_fold_labels, y_fold_pred, average='weighted', zero_division=0)
        recall = recall_score(y_fold_labels, y_fold_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_fold_labels, y_fold_pred, average='weighted', zero_division=0)

        accuracy_scores.append(accuracy)
        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

        print(f"  Fold {fold+1} Accuracy: {accuracy:.4f}")

    # 6. Display Averaged CV Results
    print(f"\nAverage 5-Fold CV Metrics for {reg_type} regularization:")
    print(f"  Accuracy: {np.mean(accuracy_scores):.4f} \u00B1 {np.std(accuracy_scores):.4f}")
    print(f"  Precision: {np.mean(precision_scores):.4f} \u00B1 {np.std(precision_scores):.4f}")
    print(f"  Recall: {np.mean(recall_scores):.4f} \u00B1 {np.std(recall_scores):.4f}")
    print(f"  F1-Score: {np.mean(f1_scores):.4f} \u00B1 {np.std(f1_scores):.4f}")


--- Running 5-Fold CV with l1 regularization ---
Input X shape: (128, 6)
Epoch 0, Loss: 2.7138
Epoch 300, Loss: 0.8355
Epoch 600, Loss: 0.8290
Epoch 900, Loss: 0.8295
Epoch 1200, Loss: 0.8255
Epoch 1500, Loss: 0.8297
Epoch 1800, Loss: 0.8276
Epoch 2100, Loss: 0.8228
Epoch 2400, Loss: 0.8294
Epoch 2700, Loss: 0.8233
Final Loss: 0.8317
  Fold 1 Accuracy: 0.7812
Input X shape: (128, 6)
Epoch 0, Loss: 2.0421
Epoch 300, Loss: 0.8322
Epoch 600, Loss: 0.8373
Epoch 900, Loss: 0.8304
Epoch 1200, Loss: 0.8389
Epoch 1500, Loss: 0.8361
Epoch 1800, Loss: 0.8387
Epoch 2100, Loss: 0.8425
Epoch 2400, Loss: 0.8337
Epoch 2700, Loss: 0.8362
Final Loss: 0.8355
  Fold 2 Accuracy: 0.7188
Input X shape: (128, 6)
Epoch 0, Loss: 3.0280
Epoch 300, Loss: 0.8264
Epoch 600, Loss: 0.8262
Epoch 900, Loss: 0.8329
Epoch 1200, Loss: 0.8229
Epoch 1500, Loss: 0.8327
Epoch 1800, Loss: 0.8265
Epoch 2100, Loss: 0.8263
Epoch 2400, Loss: 0.8296
Epoch 2700, Loss: 0.8273
Final Loss: 0.8260
  Fold 3 Accuracy: 0.6562
Input X sha

### Evaluation Results for L1 Regularization

*   **Accuracy:** 0.7500
*   **Precision:** 0.7500
*   **Recall:** 0.7500
*   **F1-Score:** 0.7500

### Evaluation Results for L2 Regularization

*   **Accuracy:** 0.7000
*   **Precision:** 0.6333
*   **Recall:** 0.7000
*   **F1-Score:** 0.6652

### Evaluation Results for Elasticnet Regularization

*   **Accuracy:** 0.7500
*   **Precision:** 0.7500
*   **Recall:** 0.7500
*   **F1-Score:** 0.7500

# Task 2

In [179]:
import numpy as np
from collections import Counter # Required for the majority vote calculation

class KNN:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def _majority_vote(self, neighbor_labels):
        """Finds the most frequent class label among the neighbors."""
        # Convert one-hot encoded labels to integer labels before counting
        integer_labels = np.argmax(neighbor_labels, axis=1)
        counts = Counter(integer_labels)

        # most_common(1) returns the highest-frequency label and its count
        # We only need the label, which is the first element of the first tuple
        return counts.most_common(1)[0][0]

    def predict(self, X_test, n_neighbors):
        """
        Predicts the class label for each point in X_test based on n_neighbors.

        Parameters:
        - X_test (numpy.ndarray): The data points to classify.
        - n_neighbors (int): The number of neighbors (K) to consider.

        Returns:
        - numpy.ndarray: The predicted class labels.
        """
        y_pred = []

        # Iterate through each data point in the test set
        for i in range(len(X_test)):
            distances = []

            # Calculate Euclidean distance to EVERY point in the training set (self.X)
            for j in range(len(self.X)):
                # Euclidean Distance Formula: sqrt(sum((p_i - q_i)^2))
                distance = np.sqrt(np.sum((X_test[i] - self.X[j])**2))
                # Store (distance, label). The label is the one-hot encoded array here.
                distances.append((distance, self.y[j]))

            # 1. Sort by distance (ascending)
            distances = sorted(distances, key=lambda x: x[0])

            # 2. Get the class labels (one-hot encoded arrays) of the top n_neighbors
            # We slice the sorted list to get the first n_neighbors tuples
            # and then extract just the label (the second element [1])
            neighbor_labels_one_hot = [label for distance, label in distances[:n_neighbors]]

            # 3. Perform Majority Vote on the one-hot encoded labels
            predicted_label = self._majority_vote(neighbor_labels_one_hot)

            y_pred.append(predicted_label)

        # Convert the final list of predictions to a NumPy array
        return np.array(y_pred)

In [181]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

knn_model=KNN(X_train,y_train)
n_neighbors=[1,3,5]

for number in n_neighbors:
  y_pred=knn_model.predict(X_test,number)
  # Convert y_test from one-hot encoded to integer labels
  y_test_labels = np.argmax(y_test, axis=1)
  accuracy = accuracy_score(y_test_labels, y_pred)
  precision=precision_score(y_test_labels, y_pred, average='weighted', zero_division=0)
  recall=recall_score(y_test_labels, y_pred, average='weighted', zero_division=0)
  f1=f1_score(y_test_labels, y_pred, average='weighted', zero_division=0)
  print(f"Accuracy for {number} neighbors: {accuracy:.4f}")
  print(f"Precision for {number} neighbors: {precision:.4f}")
  print(f"Recall for {number} neighbors: {recall:.4f}")
  print(f"F1-Score for {number} neighbors: {f1:.4f}")

Accuracy for 1 neighbors: 0.9500
Precision for 1 neighbors: 0.9559
Recall for 1 neighbors: 0.9500
F1-Score for 1 neighbors: 0.9502
Accuracy for 3 neighbors: 0.9000
Precision for 3 neighbors: 0.9000
Recall for 3 neighbors: 0.9000
F1-Score for 3 neighbors: 0.8989
Accuracy for 5 neighbors: 0.8750
Precision for 5 neighbors: 0.8855
Recall for 5 neighbors: 0.8750
F1-Score for 5 neighbors: 0.8712
