Hyperparameter tuning SVM

In [None]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, classification_report
import matplotlib.pyplot as plt
from svm import SoftMarginSVMQP  # Import your custom SVM class or the sklearn SVM class if needed
from utils import *
from config import *

def plot_images(images, labels=None, predictions=None, ncols=5, figsize=(15, 10)):
    """
    Plot a grid of images with optional labels and predictions.
    
    Parameters:
    - images (list or numpy array): List or array of images to display. 
      It should have shape (num_images, height, width, channels).
    - labels (list or numpy array, optional): True labels for the images. Default is None.
    - predictions (list or numpy array, optional): Predicted labels for the images. Default is None.
    - ncols (int, optional): Number of columns in the grid. Default is 5.
    - figsize (tuple, optional): Size of the plot figure. Default is (15, 10).
    
    Returns:
    - None: Displays the images in a grid.
    """
    num_images = len(images)
    nrows = (num_images // ncols) + (1 if num_images % ncols != 0 else 0)
    
    fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
    axes = axes.ravel()  # Flatten the axes array to iterate over
    
    for i in range(num_images):
        ax = axes[i]
        ax.imshow(images[i], cmap='gray')  # Display the image
        
        label_text = ""
        if labels is not None:
            label_text += f"True: {labels[i]}"
        if predictions is not None:
            label_text += f"\nPred: {predictions[i]}"
        
        ax.set_title(label_text, fontsize=10)
        ax.axis('off')  # Hide axes for clarity
    
    for i in range(num_images, len(axes)):
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

def grid_search_svm(model_class, X_train, y_train, X_val, y_val, param_grid):
    """
    Perform Grid Search for SVM hyperparameters.
    
    Parameters:
    - model_class (class): The model class to use (e.g., SVM).
    - X_train, y_train (numpy array): Training data and labels.
    - X_val, y_val (numpy array): Validation data and labels.
    - param_grid (list of dict): List of parameter sets for tuning.
    
    Returns:
    - best_model (model): The best model found.
    - best_params (dict): The best hyperparameters.
    - best_score (float): The best F1 score.
    """
    best_params = None
    best_score = -float('inf')
    best_model = None
    for param_set in param_grid:
        for C in param_set['C']:
            for kernel in param_set['kernel']:
                for gamma in param_set['gamma']:
                    model = model_class(C=C, kernel=kernel, gamma=gamma)
                    model.fit(X_train, y_train)
                    predictions = model.predict(X_val)
                    score =val_score(y_val, predictions)
                    print(f"Testing Parameters: {[C, kernel, gamma]}, F1 Score: {score}")
                    if score > best_score:
                        best_score = score
                        best_params = {'C': C, 'kernel': kernel, 'gamma': gamma}
                        best_model = model
    return best_model, best_params, best_score

def randomized_search_svm(model_class, X_train, y_train, X_val, y_val, param_distributions, n_iter=10):
    """
    Perform Randomized Search for SVM hyperparameters.
    
    Parameters:
    - model_class (class): The model class to use (e.g., SVM).
    - X_train, y_train (numpy array): Training data and labels.
    - X_val, y_val (numpy array): Validation data and labels.
    - param_distributions (dict): Dictionary of hyperparameter distributions.
    - n_iter (int): Number of iterations for the randomized search.
    
    Returns:
    - best_model (model): The best model found.
    - best_params (dict): The best hyperparameters.
    - best_score (float): The best F1 score.
    """
    best_params = None
    best_score = -float('inf')
    best_model = None
    for _ in range(n_iter):
        param = {key: np.random.choice(values) for key, values in param_distributions.items()}
        model = model_class(**param)
        model.fit(X_train, y_train)
        predictions = model.predict(X_val)
        score = f1_score(y_val, predictions)
        print(f"Testing Parameters: {param}, F1 Score: {score}")
        if score > best_score:
            best_score = score
            best_params = param
            best_model = model
    return best_model, best_params, best_score

def evaluate_model(model, X_train, y_train, X_test, y_test):
    """
    Evaluate SVM model performance on training and test datasets.
    
    Parameters:
    - model (model): The trained model.
    - X_train, y_train (numpy array): Training data and labels.
    - X_test, y_test (numpy array): Test data and labels.
    
    Returns:
    - train_accuracy (float): Accuracy on the training data.
    - test_accuracy (float): Accuracy on the test data.
    - train_f1 (float): F1 score on the training data.
    - test_f1 (float): F1 score on the test data.
    - misclassified_instances (numpy array): Indices of misclassified instances in the test set.
    """
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    
    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)
    train_f1 = f1_score(y_train, train_predictions)
    test_f1 = f1_score(y_test, test_predictions)

    misclassified_instances = np.where(test_predictions != y_test)[0]
    return train_accuracy, test_accuracy, train_f1, test_f1, misclassified_instances

def visualize_misclassified(X_test, y_test, misclassified_indices):
    """
    Visualize misclassified instances in SVM.
    
    Parameters:
    - X_test (numpy array): Test data.
    - y_test (numpy array): Test labels.
    - misclassified_indices (numpy array): Indices of misclassified instances.
    
    Returns:
    - None: Displays misclassified images.
    """
    reshaped_images = X_test[misclassified_indices].reshape(-1, 28, 28)
    plot_images(reshaped_images, y_test[misclassified_indices])

import matplotlib.pyplot as plt
import numpy as np

def plot_support_vectors_2D(svm_model, num_images=6, image_size=(28, 28)):
    """
    Plots the top support vectors in 2D after transforming them back to their original image dimensions.
    
    Parameters:
    - svm_model: Trained SVM model (SoftMarginSVMQP).
    - num_images: Number of top support vectors to plot.
    - image_size: The original image size (e.g., (28, 28) for MNIST).
    """
    # Sort the alphas and get the indices of the top support vectors
    sorted_indices = np.argsort(svm_model.alphas)[::-1]  # Sort in descending order of alpha
    top_indices = sorted_indices[:num_images]  # Get the top 'num_images' support vectors

    # Get the corresponding support vectors and their labels
    top_sv_X = svm_model.sv_X[top_indices]
    top_sv_y = svm_model.sv_y[top_indices]

    # Reshape the support vectors back to the original image size (28x28)
    reshaped_sv_X = top_sv_X.reshape(num_images, *image_size)

    # Plot the support vectors
    fig, axes = plt.subplots(2, 3, figsize=(10, 6))  # 2 rows and 3 columns
    for i, ax in enumerate(axes.flat):
        ax.imshow(reshaped_sv_X[i], cmap='gray')
        ax.set_title(f"Label: {top_sv_y[i][0]}")
        ax.axis('off')  # Hide axes

    plt.tight_layout()
    plt.show()

# Example usage:
if __name__ == "__main__":
    # Load your data
    train_processor = MNISTPreprocessor('./dataset/train')
    train_X, train_y = train_processor.get_all_data()
    train_X, train_y = filter_dataset(train_X, train_y, ENTRY_NUMBER_LAST_DIGIT)
    train_y = convert_labels_to_svm_labels(train_y, ENTRY_NUMBER_LAST_DIGIT)
    
    val_processor = MNISTPreprocessor('./dataset/val')
    val_X, val_y = val_processor.get_all_data()
    val_X, val_y = filter_dataset(val_X, val_y, ENTRY_NUMBER_LAST_DIGIT)
    val_y = convert_labels_to_svm_labels(val_y, ENTRY_NUMBER_LAST_DIGIT)

    # Grid Search for SVM
    param_grid_svm = [
        {'C': [1], 'kernel': ['linear'],'gamma':[1]}
    ]
    print(param_grid_svm)
    svm_model, svm_best_params, svm_best_score = grid_search_svm(SoftMarginSVMQP, train_X, train_y, val_X, val_y, param_grid_svm)
    plot_support_vectors_2D(svm_model,6,(28,28))
    print("SVM Best Parameters:", svm_best_params)
    print("SVM Best F1 Score:", svm_best_score)

    # Evaluate SVM Model
    train_acc_svm, test_acc_svm, train_f1_svm, test_f1_svm, svm_misclassified = evaluate_model(svm_model, train_X, train_y, val_X, val_y)
    print("SVM Training Accuracy:", train_acc_svm)
    print("SVM Test Accuracy:", test_acc_svm)
    print("SVM Training F1 Score:", train_f1_svm)
    print("SVM Test F1 Score:", test_f1_svm)
    
    # Visualize misclassified instances
    visualize_misclassified(val_X, val_y, svm_misclassified)

Hyperparameter Tuning ensembling

In [None]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report

from ensembling import RandomForestClassifier, AdaBoostClassifier
from utils import *
from config import *

import matplotlib.pyplot as plt

def plot_images(images, labels=None, predictions=None, ncols=5, figsize=(15, 10)):
    """
    Plot a grid of images with optional labels and predictions.
    
    Parameters:
    - images (list or numpy array): List or array of images to display. 
      It should have shape (num_images, height, width, channels).
    - labels (list or numpy array, optional): True labels for the images. Default is None.
    - predictions (list or numpy array, optional): Predicted labels for the images. Default is None.
    - ncols (int, optional): Number of columns in the grid. Default is 5.
    - figsize (tuple, optional): Size of the plot figure. Default is (15, 10).
    
    Returns:
    - None: Displays the images in a grid.
    """
    
    num_images = len(images)
    nrows = (num_images // ncols) + (1 if num_images % ncols != 0 else 0)
    
    # Create the plot
    fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
    axes = axes.ravel()  # Flatten the axes array to iterate over
    
    for i in range(num_images):
        ax = axes[i]
        ax.imshow(images[i], cmap='gray')  # Display the image
        
        # If labels and predictions are provided, display them as well
        label_text = ""
        if labels is not None:
            label_text += f"True: {labels[i]}"
        if predictions is not None:
            label_text += f"\nPred: {predictions[i]}"
        
        ax.set_title(label_text, fontsize=10)
        ax.axis('off')  # Hide axes for clarity
    
    # Hide any unused subplots if num_images isn't a multiple of ncols
    for i in range(num_images, len(axes)):
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

def grid_search_ada(model_class, X_train, y_train, X_val, y_val, param_grid):
    best_params = None
    best_score = -float('inf')
    best_model = None
    for param_set in param_grid:
        for i in param_set['num_trees']:
            for j in param_set['max_depth']:
                model = model_class(num_trees=i, max_depth=j)
                model.fit(X_train, y_train)
                predictions = model.predict(X_val)
                score = val_score(predictions,y_val)
                print(f"Testing Parameters: {[i, j]}, F1 Score: {score}")
                if score > best_score:
                    best_score = score
                    best_params = [i, j]
                    best_model = model
    return best_model, best_params, best_score

def grid_search(model_class, X_train, y_train, X_val, y_val, param_grid):
    best_params = None
    best_score = -float('inf')
    best_model = None
    for param_set in param_grid:
        for i in param_set['num_trees']:
            for j in param_set['max_depth']:
                for k in param_set['feature_subsample_size']:
                    model = model_class(num_trees=i, max_depth=j, feature_subsample_size=k)
                    model.fit(X_train, y_train)
                    predictions = model.predict(X_val)
                    score = val_score(predictions,y_val)
                    print(f"Testing Parameters: {[i, j, k]}, F1 Score: {score}")
                    if score > best_score:
                        best_score = score
                        best_params = [i, j, k]
                        best_model = model
    return best_model, best_params, best_score


def randomized_search(model_class, X_train, y_train, X_val, y_val, param_distributions, n_iter=10):
    best_params = None
    best_score = -float('inf')
    best_model = None
    for _ in range(n_iter):
        param = {key: np.random.choice(values) for key, values in param_distributions.items()}
        model = model_class(**param)
        model.fit(X_train, y_train)
        predictions = model.predict(X_val)
        score = f1_score(y_val, predictions)
        print(f"Testing Parameters: {param}, F1 Score: {score}")
        if score > best_score:
            best_score = score
            best_params = param
            best_model = model
    return best_model, best_params, best_score


def evaluate_model(model, X_train, y_train, X_test, y_test):
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    
    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)
    train_f1 = val_score(train_predictions,y_train)
    test_f1 = val_score(test_predictions,y_test)

    misclassified_instances = np.where(test_predictions != y_test)[0]
    return train_accuracy, test_accuracy, train_f1, test_f1, misclassified_instances


def visualize_misclassified(X_test, y_test, misclassified_indices):
    # Assuming images are in a format that can be plotted (e.g., 2D data like images)
    selected_indices = misclassified_indices[-4:]
    
    # Reshape the images to their 2D form if necessary (assuming 28x28 pixels)
    # Determine the middle position in the array
    middle = len(misclassified_indices) // 2
    
    # Select 4 indices around the middle (2 before and 2 after)
    selected_indices = misclassified_indices[middle - 2: middle + 2]
    
    # Reshape the images to their 2D form if necessary (assuming 28x28 pixels)
    reshaped_images = X_test[selected_indices].reshape(-1, 28, 28)
    labels = y_test[selected_indices]
    
    # Plot the images
    fig, axes = plt.subplots(1, 4, figsize=(10, 5))
    for i, ax in enumerate(axes):
        ax.imshow(reshaped_images[i], cmap='gray')
        ax.set_title(f"True Label: {labels[i]}")
        ax.axis('off')
    plt.show()

def visualize_training_misclassified(X_train, y_train, model):
    """
    Visualize 12 misclassified instances from the training data.
    
    Parameters:
    - X_train (numpy array): Training data, expected as flattened images.
    - y_train (numpy array): Training labels.
    - model: Trained classifier (Random Forest, AdaBoost, etc.).
    
    Returns:
    - None: Displays misclassified images.
    """
    # Get predictions on the training data
    y_train_pred = model.predict(X_train)
    
    # Find misclassified indices
    misclassified_indices = [i for i in range(len(y_train)) if y_train[i] != y_train_pred[i]]
    
    # Select 12 misclassified images (from the middle or any other selection logic)
    middle = len(misclassified_indices) // 2
    selected_indices = misclassified_indices[middle - 6: middle + 6]  # Choose 12 images around the middle
    
    # Reshape the images to their 2D form (assuming 28x28 pixels)
    reshaped_images = X_train[selected_indices].reshape(-1, 28, 28)
    labels = y_train[selected_indices]
    
    # Plot the misclassified images
    fig, axes = plt.subplots(3, 4, figsize=(12, 9))  # 3 rows and 4 columns
    for i, ax in enumerate(axes.flat):
        ax.imshow(reshaped_images[i], cmap='gray')
        ax.set_title(f"True Label: {labels[i]}")
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# Example usage:
if __name__ == "__main__":
    # Load your data

    train_processor = MNISTPreprocessor('./dataset/train')
    train_X, train_y = train_processor.get_all_data()
    train_X, train_y = filter_dataset(train_X, train_y, ENTRY_NUMBER_LAST_DIGIT)
    train_y = convert_labels_to_svm_labels(train_y, ENTRY_NUMBER_LAST_DIGIT)
    
    val_processor = MNISTPreprocessor('./dataset/val')
    val_X, val_y = val_processor.get_all_data()
    val_X, val_y = filter_dataset(val_X, val_y, ENTRY_NUMBER_LAST_DIGIT)
    val_y = convert_labels_to_svm_labels(val_y, ENTRY_NUMBER_LAST_DIGIT)

    #Grid Search for Random Forest
    # param_grid_rf = [
    #     {'num_trees': [10, 20, 50, 100], 'max_depth': [2,3,4,5,7], 'feature_subsample_size': [None, 100, 200]}
    # ]
    # param_grid_rf = [
    #     {'num_trees': [50], 'max_depth': [2,3], 'feature_subsample_size': [None, 100, 200]}
    # ]
    # rf_model, rf_best_params, rf_best_score = grid_search(RandomForestClassifier, train_X, train_y, val_X, val_y, param_grid_rf)
    
    # print("Random Forest Best Parameters:", rf_best_params)
    # print("Random Forest Best F1 Score:", rf_best_score)

    # # Evaluate Random Forest Model
    # train_acc_rf, test_acc_rf, train_f1_rf, test_f1_rf, rf_misclassified = evaluate_model(rf_model, train_X, train_y, val_X, val_y)
    # print("Random Forest Training Accuracy:", train_acc_rf)
    # print("Random Forest Test Accuracy:", test_acc_rf)
    # print("Random Forest Training F1 Score:", train_f1_rf)
    # print("Random Forest Test F1 Score:", test_f1_rf)
    
    # Visualize misclassified instances
    # visualize_misclassified(val_X, val_y, rf_misclassified)
    
    # Grid Search for AdaBoost
    param_grid_ab = [
        {'num_trees': [10], 'max_depth': [2]}
    ]
    ab_model, ab_best_params, ab_best_score = grid_search_ada(AdaBoostClassifier, train_X, train_y, val_X, val_y, param_grid_ab)
    
    print("AdaBoost Best Parameters:", ab_best_params)
    print("AdaBoost Best F1 Score:", ab_best_score)

    # Evaluate AdaBoost Model
    train_acc_ab, test_acc_ab, train_f1_ab, test_f1_ab, ab_misclassified = evaluate_model(ab_model, train_X, train_y, val_X, val_y)
    print("AdaBoost Training Accuracy:", train_acc_ab)
    print("AdaBoost Test Accuracy:", test_acc_ab)
    print("AdaBoost Training F1 Score:", train_f1_ab)
    print("AdaBoost Test F1 Score:", test_f1_ab)
    
    # # Visualize misclassified instances
    visualize_training_misclassified(train_X,train_y,ab_model)
    #visualize_misclassified(val_X, val_y, ab_misclassified)
