# Lab 3: Linear Classification

In [None]:
import numpy as np
import pandas as pd # Added for loading CSVs in final workflow
from sklearn.metrics import f1_score # For F1 score calculation
from sklearn.model_selection import train_test_split # For robust data splitting

## 0. Utility Functions (Data Splitting)

A utility function to split data into training and validation sets.

In [None]:
def split_data_numpy(X, y, test_size=0.2, random_state=None):
    """
    Splits NumPy arrays X and y into training and validation sets.
    Uses sklearn.model_selection.train_test_split for robustness.
    """
    if X.shape[0] != y.shape[0]:
        raise ValueError("X and y must have the same number of samples.")
    
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y if y is not None else None
    )
    return X_train, X_val, y_train, y_val

## 1. StandardizeData Function

This function standardizes features by removing the mean and scaling to unit variance. The mean and standard deviation are calculated *only* from the training data (`X_train`) and then applied to all three datasets (`X_train`, `X_val`, `X_test`). This prevents data leakage from the validation and test sets into the training process.

**Standardization Formula:**
Z = (X - mean_train) / (std_train + epsilon)

Where:
- `X` is the data to be standardized (a feature column).
- `mean_train` is the mean of the feature in the training data.
- `std_train` is the standard deviation of the feature in the training data.
- `epsilon` is a small constant (e.g., 1e-8) added to the standard deviation to prevent division by zero in case a feature has zero variance in the training set.

In [None]:
def StandardizeData(X_train, X_val, X_test):
    """
    Standardizes the training, validation, and test datasets based on the 
    mean and standard deviation of the training data.

    Args:
        X_train (np.ndarray): Training data features (samples x features).
        X_val (np.ndarray): Validation data features (samples x features).
        X_test (np.ndarray): Test data features (samples x features).

    Returns:
        tuple: (X_train_standardized, X_val_standardized, X_test_standardized)
               The standardized versions of the input datasets.
    """
    if X_train.ndim == 1:
        X_train = X_train.reshape(-1, 1)
    if X_val.ndim == 1 and X_val.size > 0: X_val = X_val.reshape(-1, 1)
    if X_test.ndim == 1 and X_test.size > 0: X_test = X_test.reshape(-1, 1)
            
    mean_train = np.mean(X_train, axis=0)
    std_train = np.std(X_train, axis=0)
    epsilon = 1e-8
    
    X_train_standardized = (X_train - mean_train) / (std_train + epsilon)
    
    # Check if X_val and X_test have features before trying to access shape[1]
    if X_val.size > 0 and X_val.shape[1] == mean_train.shape[0]:
        X_val_standardized = (X_val - mean_train) / (std_train + epsilon)
    elif X_val.size == 0:
        X_val_standardized = X_val # Return as is if empty
    else:
        print(f"Warning: X_val feature count ({X_val.shape[1] if X_val.size > 0 else 'N/A'}) differs from X_train ({mean_train.shape[0]}). Returning original X_val.")
        X_val_standardized = X_val
    
    if X_test.size > 0 and X_test.shape[1] == mean_train.shape[0]:
        X_test_standardized = (X_test - mean_train) / (std_train + epsilon)
    elif X_test.size == 0:
        X_test_standardized = X_test # Return as is if empty
    else:
        print(f"Warning: X_test feature count ({X_test.shape[1] if X_test.size > 0 else 'N/A'}) differs from X_train ({mean_train.shape[0]}). Returning original X_test.")
        X_test_standardized = X_test
    
    return X_train_standardized, X_val_standardized, X_test_standardized

### Example Usage for StandardizeData

In [None]:
X_train_sample_sd = np.array([[1, 10, 100], [2, 20, 200], [3, 30, 300], [4, 40, 400]], dtype=float)
X_val_sample_sd = np.array([[5, 50, 500], [6, 60, 600]], dtype=float)
X_test_sample_sd = np.array([[0, 0, 0], [7, 70, 700]], dtype=float)
X_train_std_sd, X_val_std_sd, X_test_std_sd = StandardizeData(X_train_sample_sd.copy(), X_val_sample_sd.copy(), X_test_sample_sd.copy())

## 2. Perceptron Model

This section defines the `Perceptron` class for binary classification.

### 2.1. Perceptron Class Definition

In [None]:
class Perceptron(object):
    """Perceptron classifier.

    Parameters
    ------------
    X : np.ndarray (for shape determination in __init__)
      Input data used to determine the number of features and initialize weights.
      This is typically the training data X.
    n_iter : int
      Number of passes over the training dataset (epochs). Default is 1.

    Attributes
    -----------
    w_ : 1d-array
      Weights after fitting. The first element is the bias term (w_0),
      and the rest are the weights for each feature (w_1, w_2, ...).
    errors_ : list
      Number of misclassifications (updates) in each epoch. Populated by 'fit'.
    n_iter : int
      Stores the number of iterations specified for training.
      
    Methods
    -------
    fit(X, y)
      Fits the training data to learn model weights.
    linear_combination(X)
      Calculates the net input (weighted sum + bias).
    predict(X)
      Returns class labels after applying a step function to the net input.
    """
    def __init__(self, X, n_iter=1):
        if not isinstance(X, np.ndarray):
            raise ValueError("Input X must be a NumPy array for Perceptron initialization.")
        if X.ndim == 1:
            X_used_for_shape = X.reshape(1, -1)
        else:
            X_used_for_shape = X
            
        self.w_ = np.zeros(1 + X_used_for_shape.shape[1]) # +1 for the bias term w_0
        self.errors_ = []
        self.n_iter = n_iter

    def linear_combination(self, X):
        """Calculate net input (weighted sum).
        """
        if X.ndim == 1:
            X_proc = X.reshape(1, -1)
        else:
            X_proc = X
            
        if X_proc.shape[1] != (len(self.w_) - 1):
            raise ValueError(f"Number of features in X ({X_proc.shape[1]}) does not match number of weights ({len(self.w_) - 1}).")
            
        return np.dot(X_proc, self.w_[1:]) + self.w_[0]

    def predict(self, X):
        """Return class label after unit step function.
        """
        net_input = self.linear_combination(X)
        predictions = np.where(net_input >= 0, 1, 0)
        return predictions

    def fit(self, X, y):
        """Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values (0 or 1).

        Returns
        -------
        self : object
        """
        # print(f"Initial weights: {self.w_}") # Optional: for debugging
        self.errors_ = [] 

        for epoch_num in range(self.n_iter):
            errors_in_epoch = 0
            for xi, target_label in zip(X, y):
                prediction = self.predict(xi) 
                
                if isinstance(prediction, np.ndarray) and prediction.ndim > 0:
                    prediction_scalar = prediction[0]
                else:
                    prediction_scalar = prediction

                update = target_label - prediction_scalar
                
                if update != 0:
                    self.w_[1:] += update * xi
                    self.w_[0] += update 
                    errors_in_epoch += 1
            
            self.errors_.append(errors_in_epoch)
            # Optional: print epoch-wise updates for debugging
            # print(f"Errors in epoch {epoch_num + 1}: {errors_in_epoch}")
            # print(f"Updated weights after epoch {epoch_num + 1}: {self.w_}")
            
        return self

### Example Usage for Perceptron (Initialization, Linear Combination, Prediction, and Fit)

In [None]:
X_sample_p_ex = np.array([[2.0, 3.0], [0.5, 1.5], [4.0, 0.0], [1.0, 1.0], [3.0, 2.0]])
y_sample_p_ex = np.array([1, 0, 1, 0, 1]) 
perceptron_ex_fit = Perceptron(X=X_sample_p_ex, n_iter=3) 
perceptron_ex_fit.fit(X_sample_p_ex, y_sample_p_ex)

## 3. Fisher's Linear Discriminant Function

This function computes the Fisher's Linear Discriminant vector `w` which optimally separates two classes.

**Methodology:**
1.  **Compute Mean Vectors**: Calculate the mean vector for each class (0 and 1).
    - `m0 = mean(X[y == 0])`
    - `m1 = mean(X[y == 1])`
2.  **Compute Within-Class Scatter Matrices (S0, S1)**:
    - For each class, calculate its scatter matrix. For class 0:
      `s0_scatter = sum((x - m0)(x - m0)^T)` for all `x` in class 0.
    - Similarly for `s1_scatter` for class 1 using `m1`.
3.  **Compute Total Within-Class Scatter Matrix (S_W)**:
    - `S_W = s0_scatter + s1_scatter`
4.  **Compute Between-Class Scatter Matrix (S_B)**:
    - `S_B = (m1 - m0)(m1 - m0)^T`
    - (Note: `S_B` itself is not directly used to find `w` in the `inv(S_W) * (m1-m0)` formulation, but it's part of Fisher's criterion J(w) = (w^T S_B w) / (w^T S_W w). The direction that maximizes this is proportional to `inv(S_W) * (m1-m0)`).
5.  **Compute Discriminant Vector (w)**:
    - `w = inv(S_W) @ (m1 - m0)`
    - If `S_W` is singular, its pseudo-inverse `pinv(S_W)` is used.
6.  **Normalize Discriminant Vector**: 
    - `w = w / ||w||` (Euclidean norm)
    This makes the vector a unit vector, which is common practice but doesn't change its direction.

In [None]:
def fisher_discriminant(X, y):
    """
    Computes Fisher's Linear Discriminant vector w.

    Args:
        X (np.ndarray): Standardized input data (samples x features).
        y (np.ndarray): Binary labels (0 or 1) for each sample.

    Returns:
        np.ndarray: Normalized Fisher's Linear Discriminant vector w.
                    Returns None if computation fails (e.g., singular S_W and pinv also fails, or class has no samples).
    """
    if X.ndim == 1:
        X = X.reshape(-1, 1)
    
    num_features = X.shape[1]

    # Separate data by class
    X0 = X[y == 0]
    X1 = X[y == 1]

    if X0.size == 0 or X1.size == 0:
        print("Warning: One or both classes have no samples. Cannot compute Fisher Discriminant.")
        return None

    # Compute mean vectors
    m0 = np.mean(X0, axis=0)
    m1 = np.mean(X1, axis=0)

    # Compute within-class scatter matrices (S0 and S1)
    s0_scatter = np.zeros((num_features, num_features))
    for row in X0:
        diff = (row - m0).reshape(num_features, 1)
        s0_scatter += np.dot(diff, diff.T)
    
    s1_scatter = np.zeros((num_features, num_features))
    for row in X1:
        diff = (row - m1).reshape(num_features, 1)
        s1_scatter += np.dot(diff, diff.T)

    # Total within-class scatter matrix S_W
    S_W = s0_scatter + s1_scatter

    # Compute discriminant vector w
    try:
        S_W_inv = np.linalg.inv(S_W)
    except np.linalg.LinAlgError:
        print("Warning: S_W is singular, using pseudo-inverse.")
        try:
            S_W_inv = np.linalg.pinv(S_W)
        except np.linalg.LinAlgError:
            print("Error: Pseudo-inverse of S_W also failed. Cannot compute w.")
            return None
            
    w = S_W_inv @ (m1 - m0)
    
    # Normalize discriminant vector w
    norm_w = np.linalg.norm(w)
    if norm_w == 0:
        print("Warning: Norm of w is zero. Cannot normalize.")
        return w # Or None, or handle as appropriate
    w_normalized = w / norm_w
    
    return w_normalized

### Example Usage for fisher_discriminant

In [None]:
# Sample standardized data for Fisher's LDA
X_exp_fisher = np.array([
    [-1, -1], [-1.5, -0.5], [-0.5, -1.5], [-0.8, -0.8], # Class 0
    [ 1,  1], [ 1.5,  0.5], [ 0.5,  1.5], [ 0.8,  0.8]  # Class 1
])
y_exp_fisher = np.array([0, 0, 0, 0, 1, 1, 1, 1])
w_fisher_example = fisher_discriminant(X_exp_fisher, y_exp_fisher)

## 4. Fisher's LDA Decision Boundary Calculation

Once data is projected onto the 1D space defined by Fisher's discriminant vector `w`, a decision boundary is needed to classify new points. This boundary is typically chosen as the midpoint between the means of the two classes in this projected 1D space.

**Methodology:**
1.  **Project Data**: The training data `X_train` is projected onto `w`: `X_train_lda = X_train @ w`.
2.  **Calculate Means of Projected Classes**: 
    - `mean_class_0_lda = mean(X_train_lda[y_train == 0])`
    - `mean_class_1_lda = mean(X_train_lda[y_train == 1])`
3.  **Compute Decision Boundary**: 
    - `decision_boundary = (mean_class_0_lda + mean_class_1_lda) / 2.0`
    This boundary value is a scalar in the 1D projected space.

In [None]:
def boundary_calculation(X_train_lda, y_train):
    """
    Calculates the decision boundary for 1D LDA-projected data.

    Args:
        X_train_lda (np.ndarray): Training data projected onto the LDA vector (1D array).
        y_train (np.ndarray): Binary labels (0 or 1) for the training data.

    Returns:
        float: The calculated decision boundary.
               Returns np.nan if one or both classes are empty in the training data.
    """
    if X_train_lda.ndim != 1:
        if X_train_lda.shape[1] == 1:
            X_train_lda = X_train_lda.flatten()
        else:
            raise ValueError("X_train_lda must be a 1D array or a 2D array with one column.")

    class_0_projected = X_train_lda[y_train == 0]
    class_1_projected = X_train_lda[y_train == 1]

    if class_0_projected.size == 0:
        # print("Warning: Class 0 has no samples in X_train_lda. Cannot compute its mean.")
        mean_class_0_lda = np.nan
    else:
        mean_class_0_lda = np.mean(class_0_projected)
        
    if class_1_projected.size == 0:
        # print("Warning: Class 1 has no samples in X_train_lda. Cannot compute its mean.")
        mean_class_1_lda = np.nan
    else:
        mean_class_1_lda = np.mean(class_1_projected)

    if np.isnan(mean_class_0_lda) or np.isnan(mean_class_1_lda):
        # print("Warning: Mean for one or both classes is NaN. Boundary calculation failed.")
        return np.nan
        
    decision_boundary = (mean_class_0_lda + mean_class_1_lda) / 2.0
    
    return decision_boundary

### Example Usage for boundary_calculation

In [None]:
if 'w_fisher_example' in globals() and w_fisher_example is not None:
    if X_exp_fisher.shape[1] == len(w_fisher_example):
        X_exp_lda_bc = X_exp_fisher @ w_fisher_example 
        boundary_example_bc = boundary_calculation(X_exp_lda_bc, y_exp_fisher)
    # else: print("Skipping boundary_calculation example due to shape mismatch.")
# else:
    # print("Skipping boundary_calculation example as w_fisher_example is not available.")

## 5. LDA Classifier Function

This function uses Fisher's LDA to classify test data. It first computes the discriminant vector `W` from the training data, projects both training and test data onto `W`, calculates a decision boundary from the projected training data, and then classifies the projected test data based on this boundary.

In [None]:
def lda_classifier(X_train, y_train, X_test):
    """
    Classifies test data using Fisher's Linear Discriminant Analysis.

    Args:
        X_train (np.ndarray): Standardized training data (samples x features).
        y_train (np.ndarray): Binary labels (0 or 1) for X_train.
        X_test (np.ndarray): Standardized test data (samples x features).

    Returns:
        np.ndarray: Predicted labels (0 or 1) for X_test. 
                    Returns an empty array if classification cannot proceed.
    """
    if X_train.ndim == 1:
        X_train = X_train.reshape(-1,1)
    if X_test.ndim == 1:
        X_test = X_test.reshape(-1,1)
    
    W = fisher_discriminant(X_train, y_train)
    if W is None:
        # print("LDA Classifier Error: Could not compute discriminant vector W.")
        return np.array([]) 

    if W.ndim > 1 and W.shape[1] == 1:
        W = W.flatten()
    elif W.ndim > 1:
        # print(f"LDA Classifier Error: Discriminant vector W has unexpected shape {W.shape}.")
        return np.array([])
        
    X_train_lda = X_train @ W
    X_test_lda = X_test @ W

    decision_boundary = boundary_calculation(X_train_lda, y_train)
    if np.isnan(decision_boundary):
        # print("LDA Classifier Error: Could not compute decision boundary (resulted in NaN).")
        return np.array([]) 

    mean_class_0_lda = np.mean(X_train_lda[y_train == 0]) if X_train_lda[y_train == 0].size > 0 else np.nan
    mean_class_1_lda = np.mean(X_train_lda[y_train == 1]) if X_train_lda[y_train == 1].size > 0 else np.nan

    if np.isnan(mean_class_0_lda) or np.isnan(mean_class_1_lda):
         # print("LDA Classifier Error: Mean of one or both classes in projected space is NaN.")
         return np.array([])

    if mean_class_1_lda >= mean_class_0_lda:
        y_pred = np.where(X_test_lda >= decision_boundary, 1, 0)
    else:
        y_pred = np.where(X_test_lda < decision_boundary, 1, 0) 

    return y_pred

### Example Usage for lda_classifier

In [None]:
X_train_lda_cl_example = X_exp_fisher
y_train_lda_cl_example = y_exp_fisher
X_test_lda_cl_example = np.array([
    [-2, -2], [ 2,  2], [ 0,  0], [-0.5, 1], [ 1, -0.5]
])
y_predictions_lda_cl = lda_classifier(X_train_lda_cl_example, y_train_lda_cl_example, X_test_lda_cl_example)

## 6. Calculate Mean, Variance, and Prior for LDA-Projected Data

This function calculates the mean, variance, and prior probability for each class from the 1D LDA-projected training data. These statistics are essential for building a Gaussian Naive Bayes classifier or a Quadratic Discriminant Analysis (QDA) classifier on the projected data.

**Methodology:**
1.  **Separate Projected Data by Class**: Given `X_train_lda` (1D array of projected training samples) and `y_train` (corresponding labels):
    - `class_0_data = X_train_lda[y_train == 0]`
    - `class_1_data = X_train_lda[y_train == 1]`
2.  **Calculate Means**: 
    - `mean_class_0 = np.mean(class_0_data)` (or 0 if `class_0_data` is empty).
    - `mean_class_1 = np.mean(class_1_data)` (or 0 if `class_1_data` is empty).
3.  **Calculate Variances**:
    - `variance_class_0 = np.var(class_0_data)` (or 1 if `class_0_data` has < 2 samples).
    - `variance_class_1 = np.var(class_1_data)` (or 1 if `class_1_data` has < 2 samples).
    The default variance of 1 for small samples or empty classes helps prevent issues like zero variance in likelihood calculations.
4.  **Calculate Priors**:
    - `n_total = len(y_train)`
    - `prior_class_0 = len(class_0_data) / n_total` (or 0 if `n_total` is 0).
    - `prior_class_1 = len(class_1_data) / n_total` (or 0 if `n_total` is 0).
5.  **Return**: The function returns these six values: `mean_class_0, variance_class_0, prior_class_0, mean_class_1, variance_class_1, prior_class_1`.

In [None]:
def mean_variance_prior(X_train_lda, y_train):
    """
    Calculates mean, variance, and prior for each class from 1D LDA-projected data.

    Args:
        X_train_lda (np.ndarray): Training data projected onto the LDA vector (1D array).
        y_train (np.ndarray): Binary labels (0 or 1) for the training data.

    Returns:
        tuple: (mean0, var0, prior0, mean1, var1, prior1)
    """
    if X_train_lda.ndim != 1:
        if X_train_lda.shape[1] == 1:
            X_train_lda = X_train_lda.flatten()
        else:
            raise ValueError("X_train_lda must be a 1D array or a 2D array with one column.")

    if X_train_lda.size == 0 or y_train.size == 0 or X_train_lda.size != y_train.size:
        # print("Warning: Input data is empty or sizes mismatch. Returning default stats.")
        return 0, 1, 0, 0, 1, 0 # Default mean=0, var=1, prior=0

    class_0_data = X_train_lda[y_train == 0]
    class_1_data = X_train_lda[y_train == 1]

    mean_class_0 = np.mean(class_0_data) if class_0_data.size > 0 else 0.0
    mean_class_1 = np.mean(class_1_data) if class_1_data.size > 0 else 0.0

    variance_class_0 = np.var(class_0_data, ddof=0) if class_0_data.size >= 2 else 1.0 
    if class_0_data.size == 1: variance_class_0 = 1.0 
    elif class_0_data.size == 0: variance_class_0 = 1.0
        
    variance_class_1 = np.var(class_1_data, ddof=0) if class_1_data.size >= 2 else 1.0
    if class_1_data.size == 1: variance_class_1 = 1.0
    elif class_1_data.size == 0: variance_class_1 = 1.0

    n_total = len(y_train)
    prior_class_0 = class_0_data.size / n_total if n_total > 0 else 0.0
    prior_class_1 = class_1_data.size / n_total if n_total > 0 else 0.0

    return mean_class_0, variance_class_0, prior_class_0, mean_class_1, variance_class_1, prior_class_1

### Example Usage for mean_variance_prior

In [None]:
if 'X_exp_fisher' in globals() and 'y_exp_fisher' in globals() and 'w_fisher_example' in globals() and w_fisher_example is not None:
    current_X_exp_lda_mvp = X_exp_fisher @ w_fisher_example 
    current_y_exp_fisher_mvp = y_exp_fisher
    m0_mvp, v0_mvp, p0_mvp, m1_mvp, v1_mvp, p1_mvp = mean_variance_prior(current_X_exp_lda_mvp, current_y_exp_fisher_mvp)
    # print(f"Class 0 (MVP Example): Mean={m0_mvp:.4f}, Variance={v0_mvp:.4f}, Prior={p0_mvp:.4f}")
    # print(f"Class 1 (MVP Example): Mean={m1_mvp:.4f}, Variance={v1_mvp:.4f}, Prior={p1_mvp:.4f}")
# else:
    # print("Skipping mean_variance_prior example as prerequisite variables are not available.")

## 7. Gaussian Likelihood Function

This function calculates the likelihood of a data point `x` belonging to a class, assuming the class's data distribution is Gaussian (normal). This is a key component in Gaussian Naive Bayes and QDA/LDA classifiers.

**Gaussian Probability Density Function (PDF) Formula:**
P(x | C_k) = (1 / sqrt(2 * pi * sigma_k^2)) * exp(-((x - mu_k)^2) / (2 * sigma_k^2))

Where:
- `x` is the data point (a scalar value in the 1D projected space).
- `mu_k` is the mean of class C_k in the projected space.
- `sigma_k^2` is the variance of class C_k in the projected space.
- `pi` is the mathematical constant pi.

To avoid numerical issues (e.g., division by zero if variance is very small or zero), a small epsilon (e.g., 1e-9) is added to the variance terms in the denominator and under the square root.

In [None]:
def likelihood(mean, variance, x):
    """
    Calculates the Gaussian probability density (likelihood) of a data point x.

    Args:
        mean (float): Mean of the Gaussian distribution.
        variance (float): Variance of the Gaussian distribution.
        x (float or np.ndarray): The data point(s) for which to calculate the likelihood.

    Returns:
        float or np.ndarray: The Gaussian probability density value(s).
    """
    epsilon = 1e-9 
    stable_variance = variance + epsilon
    exponent = -((x - mean)**2) / (2 * stable_variance)
    coefficient = 1 / (np.sqrt(2 * np.pi * stable_variance))
    prob_density = coefficient * np.exp(exponent)
    return prob_density

### Example Usage for likelihood

In [None]:
mean_ex_lh = 0.0; variance_ex_lh = 1.0; x_point_ex_lh = 0.0
pdf_val_lh = likelihood(mean_ex_lh, variance_ex_lh, x_point_ex_lh)
# print(f"Likelihood for x={x_point_ex_lh}, mean={mean_ex_lh}, variance={variance_ex_lh}: {pdf_val_lh:.4f}")

## 8. LDA Classifier with MAP Rule

This function classifies test data using Fisher's LDA followed by a Maximum A Posteriori (MAP) decision rule. It calculates class-conditional probabilities (likelihoods) using Gaussian distributions for the projected data and combines them with class priors to determine the posterior probabilities.

**Methodology:**
1.  **Compute Discriminant Vector `W`**: Same as `lda_classifier` (using `fisher_discriminant`).
2.  **Project Data**: Project `X_train` and `X_test` onto `W` to get `X_train_lda` and `X_test_lda`.
3.  **Calculate Class Statistics and Priors**: Use `mean_variance_prior(X_train_lda, y_train)` to get `mean0, var0, prior0, mean1, var1, prior1`.
4.  **Classify Test Data (MAP Rule)**: For each `x_sample` in `X_test_lda`:
    *   Calculate likelihood for class 0: `lh0 = likelihood(mean0, var0, x_sample)`.
    *   Calculate likelihood for class 1: `lh1 = likelihood(mean1, var1, x_sample)`.
    *   Calculate posterior for class 0: `post0 = lh0 * prior0`.
    *   Calculate posterior for class 1: `post1 = lh1 * prior1`.
    *   Predict class 1 if `post1 >= post0`, otherwise predict class 0.
5.  **Return Predictions**: A NumPy array of predicted labels for `X_test`.

In [None]:
def lda_classifier_map(X_train, y_train, X_test):
    """
    Classifies test data using Fisher's LDA with a MAP decision rule.

    Args:
        X_train (np.ndarray): Standardized training data (samples x features).
        y_train (np.ndarray): Binary labels (0 or 1) for X_train.
        X_test (np.ndarray): Standardized test data (samples x features).

    Returns:
        np.ndarray: Predicted labels (0 or 1) for X_test.
                    Returns an empty array if classification cannot proceed.
    """
    if X_train.ndim == 1: X_train = X_train.reshape(-1,1)
    if X_test.ndim == 1 and X_test.size > 0: X_test = X_test.reshape(-1,1)
    elif X_test.ndim == 1 and X_test.size == 0: X_test = np.array([]).reshape(0,X_train.shape[1]) # Match feature dim if empty

    W = fisher_discriminant(X_train, y_train)
    if W is None:
        print("LDA MAP Classifier Error: Could not compute discriminant vector W.")
        return np.array([])

    if W.ndim > 1 and W.shape[1] == 1: W = W.flatten()
    elif W.ndim > 1:
        print(f"LDA MAP Classifier Error: Discriminant vector W has unexpected shape {W.shape}.")
        return np.array([])
        
    X_train_lda = X_train @ W
    X_test_lda = X_test @ W # This will be 1D array or scalar if X_test is single sample

    mean0, var0, prior0, mean1, var1, prior1 = mean_variance_prior(X_train_lda, y_train)
    
    # Handle cases where a class might be absent in training, leading to prior=0 or NaN stats
    if prior0 == 0 and prior1 == 0: # Both classes absent, should not happen if fisher_discriminant passed
        print("LDA MAP Classifier Error: Both class priors are zero. Cannot classify.")
        return np.array([0] * len(X_test_lda)) # Default prediction or empty
    if np.isnan(mean0) or np.isnan(var0) or np.isnan(mean1) or np.isnan(var1):
        print("LDA MAP Classifier Error: Class statistics (mean/var) are NaN. Cannot classify.")
        return np.array([0] * len(X_test_lda))
        
    predictions = []
    for x_sample_lda in X_test_lda:
        lh0 = likelihood(mean0, var0, x_sample_lda) if prior0 > 0 else 0
        lh1 = likelihood(mean1, var1, x_sample_lda) if prior1 > 0 else 0
        
        post0 = lh0 * prior0
        post1 = lh1 * prior1
        
        if post1 >= post0:
            predictions.append(1)
        else:
            predictions.append(0)
            
    return np.array(predictions)

### Example Usage for lda_classifier_map

In [None]:
# Using X_exp_fisher and y_exp_fisher from the fisher_discriminant example as training data
X_train_map_example = X_exp_fisher
y_train_map_example = y_exp_fisher

# Using X_test_lda_example from lda_classifier example
X_test_map_example = X_test_lda_cl_example # This was the name in lda_classifier example section

print("LDA MAP Classifier Example:")
# print("X_train:\n", X_train_map_example)
# print("y_train:\n", y_train_map_example)
# print("X_test:\n", X_test_map_example)

y_predictions_lda_map = lda_classifier_map(X_train_map_example, y_train_map_example, X_test_map_example)

if y_predictions_lda_map is not None and y_predictions_lda_map.size > 0:
    print("\nPredicted labels for X_test (MAP rule):", y_predictions_lda_map)
    # Expected for X_test_lda_example = [[-2,-2], [2,2], [0,0], [-0.5,1], [1,-0.5]]
    # Given priors are 0.5/0.5, this should be similar to lda_classifier if boundary is at 0.
    # For this data, m0_proj ~ -1.34, m1_proj ~ 1.34. Variances are small and equal.
    # For x_proj = 0, likelihoods should be equal, posteriors equal, predict 1 (due to >=).
    # Expected (approximate): [0, 1, 1, ?, ?]
else:
    print("LDA MAP classification failed or returned no predictions.")

## Lab 3: Full Workflow and Evaluation

### 1. Load Datasets (`lab3_training.csv`, `lab3_testing.csv`)

In [None]:
try:
    training_df = pd.read_csv('lab3_training.csv')
    testing_df = pd.read_csv('lab3_testing.csv')
    print("Successfully loaded lab3_training.csv and lab3_testing.csv")
except FileNotFoundError:
    print("CSV files not found. Creating dummy data for final workflow.")
    # Dummy training_df: 100 rows, 3 features + 'label'
    training_data_dict = {
        'feature1': np.random.rand(100) * 10 - 5, 
        'feature2': np.random.rand(100) * 20 - 10,
        'feature3': np.random.normal(0, 1, 100),
        'label': np.random.randint(0, 2, 100)
    }
    training_df = pd.DataFrame(training_data_dict)
    # Ensure some class balance for dummy data if needed
    training_df['label'].iloc[:50] = 0
    training_df['label'].iloc[50:] = 1
    
    # Dummy testing_df: 50 rows, 3 features (no 'Id' or 'label')
    testing_data_dict = {
        'feature1': np.random.rand(50) * 10 - 5,
        'feature2': np.random.rand(50) * 20 - 10,
        'feature3': np.random.normal(0, 1, 50)
    }
    testing_df = pd.DataFrame(testing_data_dict)
    print("Created dummy training and testing DataFrames.")

# print("Training Data Head:\n", training_df.head())
# print("Testing Data Head:\n", testing_df.head())

### 2. Split Training Data for Validation

In [None]:
X_all = training_df.drop(columns=['label']).values
y_all = training_df['label'].values
X_test_final_loaded = testing_df.values # This is the test set for final predictions

# Using the utility function defined earlier
X_train_full, X_val_full, y_train_full, y_val_full = split_data_numpy(X_all, y_all, test_size=0.2, random_state=42)

print(f"X_train_full shape: {X_train_full.shape}, y_train_full shape: {y_train_full.shape}")
print(f"X_val_full shape: {X_val_full.shape}, y_val_full shape: {y_val_full.shape}")
print(f"X_test_final_loaded shape: {X_test_final_loaded.shape}")

### 3. Standardize Data

In [None]:
X_train_std, X_val_std, X_test_std = StandardizeData(X_train_full, X_val_full, X_test_final_loaded)

### 4. Part 1: Perceptron Classifier

In [None]:
print("--- Training Perceptron ---")
perceptron_model = Perceptron(X=X_train_std, n_iter=100) # Using 100 iterations
perceptron_model.fit(X_train_std, y_train_full)

y_pred_perceptron_val = perceptron_model.predict(X_val_std)
f1_perceptron = f1_score(y_val_full, y_pred_perceptron_val, zero_division=0)
print(f"Perceptron F1 Score on Validation Set: {f1_perceptron:.4f}")

y_pred_perceptron_test = perceptron_model.predict(X_test_std)
output_df_part1 = pd.DataFrame({'Id': np.arange(1, len(y_pred_perceptron_test) + 1), 'Label': y_pred_perceptron_test})
output_df_part1.to_csv('lab3_part1.csv', index=False)
print(f"Perceptron test predictions saved to lab3_part1.csv. Shape: {output_df_part1.shape}")

### 5. Part 2: LDA Classifier (using Fisher's Discriminant)

In [None]:
print("\n--- Training LDA Classifier ---")
y_pred_lda_val = lda_classifier(X_train_std, y_train_full, X_val_std)
if y_pred_lda_val.size > 0:
    f1_lda = f1_score(y_val_full, y_pred_lda_val, zero_division=0)
    print(f"LDA Classifier F1 Score on Validation Set: {f1_lda:.4f}")
else:
    print("LDA Classifier validation failed, F1 score not calculated.")

y_pred_lda_test = lda_classifier(X_train_std, y_train_full, X_test_std)
if y_pred_lda_test.size > 0:
    output_df_part2 = pd.DataFrame({'Id': np.arange(1, len(y_pred_lda_test) + 1), 'Label': y_pred_lda_test})
    output_df_part2.to_csv('lab3_part2.csv', index=False)
    print(f"LDA Classifier test predictions saved to lab3_part2.csv. Shape: {output_df_part2.shape}")
else:
    print("LDA Classifier test prediction failed. File not saved.")

### 6. Part 3: LDA Classifier with MAP Rule

In [None]:
print("\n--- Training LDA Classifier with MAP Rule ---")
y_pred_lda_map_val = lda_classifier_map(X_train_std, y_train_full, X_val_std)
if y_pred_lda_map_val.size > 0:
    f1_lda_map = f1_score(y_val_full, y_pred_lda_map_val, zero_division=0)
    print(f"LDA MAP Classifier F1 Score on Validation Set: {f1_lda_map:.4f}")
else:
    print("LDA MAP Classifier validation failed, F1 score not calculated.")

y_pred_lda_map_test = lda_classifier_map(X_train_std, y_train_full, X_test_std)
if y_pred_lda_map_test.size > 0:
    output_df_part3 = pd.DataFrame({'Id': np.arange(1, len(y_pred_lda_map_test) + 1), 'Label': y_pred_lda_map_test})
    output_df_part3.to_csv('lab3_part3.csv', index=False)
    print(f"LDA MAP Classifier test predictions saved to lab3_part3.csv. Shape: {output_df_part3.shape}")
else:
    print("LDA MAP Classifier test prediction failed. File not saved.")