In [1]:
# flake8: noqa: E302

import gzip
import os
import shutil

import requests


def download_and_extract_dataset(url, save_path, folder_path):
    """Download and extract dataset if it doesn't exist."""
    if not os.path.exists(save_path):
        print(f"Downloading {os.path.basename(save_path)}...")
        response = requests.get(url)
        with open(save_path, "wb") as file:
            file.write(response.content)

        decompressed_file_name = os.path.splitext(os.path.basename(save_path))[0]
        decompressed_file_path = os.path.join(folder_path, decompressed_file_name)

        with gzip.open(save_path, "rb") as f_in:
            with open(decompressed_file_path, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

        print(f"{decompressed_file_name} downloaded and extracted.")
    else:
        print(f"{os.path.basename(save_path)} already exists.")


file_info = [
    (
        "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
        "train-images-idx3-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
        "train-labels-idx1-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
        "t10k-images-idx3-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
        "t10k-labels-idx1-ubyte.gz",
    ),
]

folder_name = "tmp/mnist"
folder_path = os.path.join(os.getcwd(), folder_name)

os.makedirs(folder_path, exist_ok=True)  # Create folder if it doesn't exist

# Download and extract each file
for url, file_name in file_info:
    path_to_save = os.path.join(folder_path, file_name)
    download_and_extract_dataset(url, path_to_save, folder_path)

# %%
import numpy as np


def read_idx3_ubyte_image_file(filename):
    """Read IDX3-ubyte formatted image data."""
    with open(filename, "rb") as f:
        magic_num = int.from_bytes(f.read(4), byteorder="big")
        num_images = int.from_bytes(f.read(4), byteorder="big")
        num_rows = int.from_bytes(f.read(4), byteorder="big")
        num_cols = int.from_bytes(f.read(4), byteorder="big")

        if magic_num != 2051:
            raise ValueError(f"Invalid magic number: {magic_num}")

        images = np.zeros((num_images, num_rows, num_cols), dtype=np.uint8)

        for i in range(num_images):
            for r in range(num_rows):
                for c in range(num_cols):
                    pixel = int.from_bytes(f.read(1), byteorder="big")
                    images[i, r, c] = pixel

    return images


def read_idx1_ubyte_label_file(filename):
    """Read IDX1-ubyte formatted label data."""
    with open(filename, "rb") as f:
        magic_num = int.from_bytes(f.read(4), byteorder="big")
        num_labels = int.from_bytes(f.read(4), byteorder="big")

        if magic_num != 2049:
            raise ValueError(f"Invalid magic number: {magic_num}")

        labels = np.zeros(num_labels, dtype=np.uint8)

        for i in range(num_labels):
            labels[i] = int.from_bytes(f.read(1), byteorder="big")

    return labels


# Example usage
folder_path = os.path.join(
    os.getcwd(), folder_name
)  # Adjust this path to where you stored the files

train_images = read_idx3_ubyte_image_file(
    os.path.join(folder_path, "train-images-idx3-ubyte")
)
train_labels = read_idx1_ubyte_label_file(
    os.path.join(folder_path, "train-labels-idx1-ubyte")
)
test_images = read_idx3_ubyte_image_file(
    os.path.join(folder_path, "t10k-images-idx3-ubyte")
)
test_labels = read_idx1_ubyte_label_file(
    os.path.join(folder_path, "t10k-labels-idx1-ubyte")
)

print(
    f"Shape of train_images: {train_images.shape}"
)  # Should output "Shape of train_images: (60000, 28, 28)"
print(
    f"Shape of train_labels: {train_labels.shape}"
)  # Should output "Shape of train_labels: (60000,)"
print(
    f"Shape of test_images: {test_images.shape}"
)  # Should output "Shape of test_images: (10000, 28, 28)"
print(
    f"Shape of test_labels: {test_labels.shape}"
)  # Should output "Shape of test_labels: (10000,)"

# %%
# Reshape the datasets from 3D to 2D
train_images_2d = train_images.reshape(
    train_images.shape[0], -1
)  # -1 infers the size from the remaining dimensions
test_images_2d = test_images.reshape(test_images.shape[0], -1)

train-images-idx3-ubyte.gz already exists.
train-labels-idx1-ubyte.gz already exists.
t10k-images-idx3-ubyte.gz already exists.
t10k-labels-idx1-ubyte.gz already exists.
Shape of train_images: (60000, 28, 28)
Shape of train_labels: (60000,)
Shape of test_images: (10000, 28, 28)
Shape of test_labels: (10000,)


## Transofrming image to binary

In [2]:
# Thresholding
train_images_binary = np.where(train_images_2d > 127, 1, 0)
test_images_binary = np.where(test_images_2d > 127, 1, 0)

# Check the shape and unique values to confirm the conversion
print(train_images_binary.shape)
print(np.unique(train_images_binary))
print(test_images_binary.shape)
print(np.unique(test_images_binary))

(60000, 784)
[0 1]
(10000, 784)
[0 1]


## Continue as normal

In [3]:
from sklearn.tree import DecisionTreeClassifier

# Create and train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=10, random_state=0)
clf.fit(train_images_binary, train_labels)

In [4]:
num_test_samples = len(test_images_2d)
python_predictions = clf.predict(test_images_2d)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

Python accuracy: 81.41000000000001 %


## Try out an SVM

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_images_binary)
test_features_normalized = scaler.transform(test_images_binary)

In [6]:
from sklearn.svm import SVC

In [7]:
clf = SVC(kernel="rbf", random_state=0, C=1000)
clf.fit(train_features_normalized, train_labels)

In [8]:
num_test_samples = len(test_features_normalized)
python_predictions = clf.predict(test_features_normalized)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

print("Number of support vectors", clf.n_support_)
print("Total number of support vectors", sum(clf.n_support_))

Python accuracy: 97.08 %
Number of support vectors [1098  705 2301 2108 1699 2059 1580 2049 2123 2129]
Total number of support vectors 17851


## Try out an MLP neural network

In [9]:
from sklearn.neural_network import MLPClassifier

In [10]:
# Define and initialize the MLP classifier
mlp_large = MLPClassifier(
    hidden_layer_sizes=(397,), max_iter=30, alpha=0.0001, solver="adam", random_state=0
)
mlp_large.fit(train_features_normalized, train_labels)



In [11]:
# Evaluate the classifier
accuracy = mlp_large.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [mlp_large.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in mlp_large.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.975
Number of neurons per layer: [784, 397, 10]


## Try out a smaller network

In [12]:
# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(24,), max_iter=30, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)



In [13]:
# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [clf.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf.coefs_]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9508
Number of neurons per layer: [784, 24, 10]


## Try out pruning

In [14]:
import copy

from helper import prune_neural_network

pruned_network = copy.deepcopy(clf)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 19056
Number of changed weight parameters: 11483
Number of bias parameters: 34
Number of changed bias parameters: 8
Percentage of weights pruned: 60.26%
Percentage of biases pruned: 23.53%
Remaining number of non-zero weights: 7573
Remaining number of non-zero biases: 26


In [15]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [pruned_network.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in pruned_network.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.943
Number of neurons per layer: [784, 24, 10]


## Prune the larger network

In [16]:
pruned_network = copy.deepcopy(mlp_large)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 315218
Number of changed weight parameters: 265370
Number of bias parameters: 407
Number of changed bias parameters: 188
Percentage of weights pruned: 84.19%
Percentage of biases pruned: 46.19%
Remaining number of non-zero weights: 49848
Remaining number of non-zero biases: 219


In [17]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [pruned_network.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in pruned_network.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9653
Number of neurons per layer: [784, 397, 10]


## Let's try haar features on the binary image

In [18]:
def integral_image(img):
    """
    Compute the integral image of the input img.
    """
    # Convert the image to int32 type
    img = img.astype(np.int32)
    int_img = np.zeros_like(img)

    for x in range(img.shape[1]):
        for y in range(img.shape[0]):
            int_img[y, x] = (
                img[y, x]
                + (int_img[y - 1, x] if y - 1 >= 0 else 0)
                + (int_img[y, x - 1] if x - 1 >= 0 else 0)
                - (int_img[y - 1, x - 1] if x - 1 >= 0 and y - 1 >= 0 else 0)
            )
    return int_img


def compact_haar_features4(img, h=5, w=5):
    int_img = integral_image(img)
    features = []

    for y in range(0, img.shape[0] - h + 1, h):
        for x in range(0, img.shape[1] - w + 1, w):
            # Horizontal edge feature
            white = (
                int_img[y + h // 2 - 1, x + w - 1]
                + int_img[y, x]
                - int_img[y, x + w - 1]
                - int_img[y + h // 2 - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y + h // 2, x]
                - int_img[y + h // 2, x + w - 1]
                - int_img[y + h - 1, x]
            )
            features.append(black - white)

            # Vertical edge feature
            white = (
                int_img[y + h - 1, x + w // 2 - 1]
                + int_img[y, x]
                - int_img[y, x + w // 2 - 1]
                - int_img[y + h - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y, x + w // 2]
                - int_img[y, x + w - 1]
                - int_img[y + h - 1, x + w // 2]
            )
            features.append(black - white)

    return features

In [19]:
haar_1 = compact_haar_features4(train_images_binary[0].reshape(28, 28))
print(haar_1)
len_haar_features = len(haar_1)
print("length of a haar feature", len_haar_features)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 3, -1, -1, -2, -1, 0, 0, 0, 0, 0, 4, 1, -1, 0, 0, 0, 0, 0, 0, 2, 2, 5, 6, 0, 0, 1, 1, 8, 4, -2, 0, 0, 0, 0, 0]
length of a haar feature 50


In [20]:
# compute datasets

num_train = len(train_images)
num_test = len(test_images)

haar_train = np.zeros((num_train, len_haar_features))
haar_test = np.zeros((num_test, len_haar_features))

for i in range(num_train):
    haar_train[i] = compact_haar_features4(train_images_binary[i].reshape(28, 28))

for i in range(num_test):
    haar_test[i] = compact_haar_features4(test_images_binary[i].reshape(28, 28))

In [21]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_haar_features_normalized = scaler.fit_transform(haar_train)
test_haar_features_normalized = scaler.transform(haar_test)

In [22]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(30,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_haar_features_normalized, train_labels)


# Evaluate the classifier
accuracy = clf.score(test_haar_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.8955




In [23]:
layers_sizes = [clf.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf.coefs_]

print("Number of neurons per layer:", layers_sizes)

Number of neurons per layer: [50, 30, 10]


## Add classic features

In [24]:
def average_intensity(image):
    return np.mean(image)


def aspect_ratio(image, threshold=0.5):
    # Threshold the image to create a binary representation
    bin_image = image > threshold
    # Find the bounding box
    row_indices, col_indices = np.nonzero(bin_image)
    max_row, min_row = np.max(row_indices), np.min(row_indices)
    max_col, min_col = np.max(col_indices), np.min(col_indices)

    # Calculate the aspect ratio of the bounding box
    width = max_col - min_col + 1
    height = max_row - min_row + 1

    if height == 0:  # To avoid division by zero
        return 1.0

    return width / height


from scipy.optimize import curve_fit


def linear_func(x, a, b):
    return a * x + b


def avg_dist_to_fitted_line(image, threshold=0.5):
    # Threshold the image
    bin_image = (image > threshold).astype(int)

    # Identify connected components
    labeled_array, num_features = label(bin_image)

    # Find the largest connected component
    largest_component = None
    max_count = 0
    for i in range(1, num_features + 1):
        component = np.where(labeled_array == i)
        count = len(component[0])
        if count > max_count:
            max_count = count
            largest_component = component

    if largest_component is None:
        return 0.0

    x, y = largest_component
    if len(x) <= 1:  # Can't fit a line to a single point or empty set
        return 0.0

    # Fit a line to the largest component
    popt, _ = curve_fit(linear_func, x, y)

    # Calculate the average distance from each point to the line
    distances = np.abs(y - linear_func(x, *popt))
    avg_distance = np.mean(distances)
    return avg_distance


from scipy.ndimage import label


def num_regions_below_threshold(image, threshold=0.5):
    # Threshold the image so that pixels below the threshold are set to 1
    # and those above the threshold are set to 0.
    bin_image = image < threshold

    # Use connected components labeling
    labeled_array, num_features = label(bin_image)

    # Return the number of unique regions
    # (subtracting 1 as one of the labels will be the background)
    return num_features

In [25]:
# compute datasets
num_train = len(train_images_binary)
num_test = len(test_images_binary)

aspect_ratio_train = np.zeros(num_train)
aspect_ratio_test = np.zeros(num_test)

num_white_regions_train = np.zeros(num_train)
num_white_regions_test = np.zeros(num_test)

avg_dist_to_fitted_line_train = np.zeros(num_train)
avg_dist_to_fitted_line_test = np.zeros(num_test)

for i in range(num_train):
    aspect_ratio_train[i] = aspect_ratio(train_images_binary[i].reshape(28, 28))
    num_white_regions_train[i] = num_regions_below_threshold(
        train_images_binary[i].reshape(28, 28)
    )
    avg_dist_to_fitted_line_train[i] = avg_dist_to_fitted_line(
        train_images_binary[i].reshape(28, 28)
    )

for i in range(num_test):
    aspect_ratio_test[i] = aspect_ratio(test_images_binary[i].reshape(28, 28))
    num_white_regions_test[i] = num_regions_below_threshold(
        test_images_binary[i].reshape(28, 28)
    )
    avg_dist_to_fitted_line_test[i] = avg_dist_to_fitted_line(
        test_images_binary[i].reshape(28, 28)
    )



In [26]:
train_features = np.zeros((num_train, 52))

train_features[:, 0] = aspect_ratio_train
train_features[:, 1] = num_white_regions_train
train_features[:, 2:] = haar_train

test_features = np.zeros((num_test, 52))

test_features[:, 0] = aspect_ratio_test
test_features[:, 1] = num_white_regions_test
test_features[:, 2:] = haar_test

In [27]:
print(haar_train[0])
print(train_features[0][0])
print(train_features[0][1])
print(train_features[0][2])
print(train_features[0][3])
print(train_features[0])

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  3.  5.  3.  3. -1. -1.
 -2. -1.  0.  0.  0.  0.  0.  4.  1. -1.  0.  0.  0.  0.  0.  0.  2.  2.
  5.  6.  0.  0.  1.  1.  8.  4. -2.  0.  0.  0.  0.  0.]
0.95
1.0
0.0
0.0
[ 0.95  1.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    3.    5.    3.    3.   -1.   -1.   -2.   -1.    0.    0.
  0.    0.    0.    4.    1.   -1.    0.    0.    0.    0.    0.    0.
  2.    2.    5.    6.    0.    0.    1.    1.    8.    4.   -2.    0.
  0.    0.    0.    0.  ]


In [28]:
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features)

In [29]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf_a = MLPClassifier(
    hidden_layer_sizes=(31,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf_a.fit(train_features_normalized, train_labels)


# Evaluate the classifier
accuracy = clf_a.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [clf_a.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf_a.coefs_]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9179
Number of neurons per layer: [52, 31, 10]




## Let's prune this network

In [30]:
import copy

from helper import prune_neural_network

pruned_network = copy.deepcopy(clf_a)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 1922
Number of changed weight parameters: 700
Number of bias parameters: 41
Number of changed bias parameters: 8
Percentage of weights pruned: 36.42%
Percentage of biases pruned: 19.51%
Remaining number of non-zero weights: 1222
Remaining number of non-zero biases: 33


In [31]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [pruned_network.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in pruned_network.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9149
Number of neurons per layer: [52, 31, 10]


## Try out haar features with step size 3, window 6x6

In [32]:
def compute_haar_features(image):
    if image.shape != (28, 28):
        raise ValueError("Input image must be of shape 28x28.")

    features = []

    # Sliding window
    for i in range(0, 28, 3):  # Slide vertically with a step of 3
        for j in range(0, 28, 3):  # Slide horizontally with a step of 3

            if i + 6 > 28 or j + 6 > 28:
                continue

            # Extract 6x6 window
            window = image[i : i + 6, j : j + 6]

            # Horizontal feature
            horizontal_feature_value = np.sum(window[0:3, :]) - np.sum(window[3:6, :])

            # Vertical feature
            vertical_feature_value = np.sum(window[:, 0:3]) - np.sum(window[:, 3:6])

            features.append(horizontal_feature_value)
            features.append(vertical_feature_value)

    return features

In [33]:
haar_1 = compute_haar_features(train_images_binary[0].reshape(28, 28))
print(haar_1)
len_haar_features = len(haar_1)
print("length of a haar feature", len_haar_features)

[0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -3, -1, -4, 0, -2, 2, 0, 0, -2, -2, -9, -5, -16, -2, -17, -1, -9, 5, -1, 1, 0, 4, 0, 0, 2, -2, 5, -9, 7, -3, 12, 4, 11, 7, 5, 1, 2, 2, 0, 0, 0, 0, 4, -4, 4, -6, -2, 6, -2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, -5, 3, -3, -7, 3, -5, 5, 0, 0, 0, 0, 0, 0, 0, 0, -5, -5, -9, -9, -3, 5, 1, 9, 0, 0, -1, -1, -6, -4, -13, -3, -9, -3, 7, 1, 12, 6, 4, 4, 0, 0, -1, -3, 1, -5, 8, -2, 12, 4, 7, 5, 1, 1, 0, 0, 0, 0]
length of a haar feature 128


In [34]:
# compute datasets

num_train = len(train_images)
num_test = len(test_images)

haar_train = np.zeros((num_train, len_haar_features))
haar_test = np.zeros((num_test, len_haar_features))

for i in range(num_train):
    haar_train[i] = compute_haar_features(train_images_binary[i].reshape(28, 28))

for i in range(num_test):
    haar_test[i] = compute_haar_features(test_images_binary[i].reshape(28, 28))

In [35]:
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(haar_train)
test_features_normalized = scaler.transform(haar_test)

In [36]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(69,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)

# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9679




In [37]:
pruned_network = copy.deepcopy(clf)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 9522
Number of changed weight parameters: 4080
Number of bias parameters: 79
Number of changed bias parameters: 26
Percentage of weights pruned: 42.85%
Percentage of biases pruned: 32.91%
Remaining number of non-zero weights: 5442
Remaining number of non-zero biases: 53


In [38]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.964


## Add other features to the Haar features

In [39]:
train_features = np.zeros((num_train, 130))

train_features[:, 0] = aspect_ratio_train
train_features[:, 1] = num_white_regions_train
train_features[:, 2:] = haar_train

test_features = np.zeros((num_test, 130))

test_features[:, 0] = aspect_ratio_test
test_features[:, 1] = num_white_regions_test
test_features[:, 2:] = haar_test

In [40]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(haar_train)
test_features_normalized = scaler.transform(haar_test)

In [41]:
# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(70,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)

# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9698




In [42]:
pruned_network = copy.deepcopy(clf)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 9660
Number of changed weight parameters: 4206
Number of bias parameters: 80
Number of changed bias parameters: 35
Percentage of weights pruned: 43.54%
Percentage of biases pruned: 43.75%
Remaining number of non-zero weights: 5454
Remaining number of non-zero biases: 45


In [43]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9667


## Let's try to train the network with L1 regularization to make it more sparse and prune it further, using PyTorch

In [55]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

# Convert your dataset to PyTorch tensors
train_features_tensor = torch.tensor(train_features_normalized, dtype=torch.float32)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
test_features_tensor = torch.tensor(test_features_normalized, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)

# Define the PyTorch neural network
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Hyperparameters
input_dim = train_features_normalized.shape[1]
hidden_dim = 70
output_dim = len(set(train_labels))  # Assuming train_labels are class indices

# Instantiate the model
model = SimpleNN(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop with L1 regularization
lambda_l1 = 0.0001  # L1 regularization coefficient
num_epochs = 1000

for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(train_features_tensor)

    loss = criterion(outputs, train_labels_tensor)

    # Add L1 regularization
    l1_reg = torch.tensor(0.0, requires_grad=True)
    for param in model.parameters():
        l1_reg = l1_reg + torch.norm(param, 1)
    loss += lambda_l1 * l1_reg

    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    test_outputs = model(test_features_tensor)
    _, predicted = torch.max(test_outputs.data, 1)
    accuracy = accuracy_score(test_labels, predicted.numpy())
    print("Accuracy:", accuracy)

Epoch [100/1000], Loss: 0.4775
Epoch [200/1000], Loss: 0.3366
Epoch [300/1000], Loss: 0.2825
Epoch [400/1000], Loss: 0.2497
Epoch [500/1000], Loss: 0.2267
Epoch [600/1000], Loss: 0.2100
Epoch [700/1000], Loss: 0.1970
Epoch [800/1000], Loss: 0.1869
Epoch [900/1000], Loss: 0.1786
Epoch [1000/1000], Loss: 0.1716
Accuracy: 0.967


In [56]:
import copy

from helper import prune_pytorch_network

pruned_network = copy.deepcopy(model)
pruned_network = prune_pytorch_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 9660
Number of changed weight parameters: 6922
Number of bias parameters: 80
Number of changed bias parameters: 45
Percentage of weights pruned: 71.66%
Percentage of biases pruned: 56.25%
Remaining number of non-zero weights: 2738
Remaining number of non-zero biases: 35


In [57]:
# Evaluation
pruned_network.eval()  # Set the model to evaluation mode
with torch.no_grad():
    test_outputs = pruned_network(test_features_tensor)
    _, predicted = torch.max(test_outputs.data, 1)
    accuracy = accuracy_score(test_labels, predicted.numpy())
    print("Accuracy:", accuracy)

Accuracy: 0.9608


## Try out an SVM with fewer support vectors

In [69]:
from sklearn.svm import NuSVC

clf = NuSVC(nu=0.0001, kernel="rbf", random_state=0)
clf.fit(train_features_normalized, train_labels)

In [70]:
num_test_samples = len(test_features_normalized)
python_predictions = clf.predict(test_features_normalized)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

print("Number of support vectors", clf.n_support_)
print("Total number of support vectors", sum(clf.n_support_))

Python accuracy: 95.83 %
Number of support vectors [568 408 893 834 684 801 597 727 913 714]
Total number of support vectors 7139
