In [1]:
import gzip
import os
import shutil

import requests


def download_and_extract_dataset(url, save_path, folder_path):
    """Download and extract dataset if it doesn't exist."""
    if not os.path.exists(save_path):
        print(f"Downloading {os.path.basename(save_path)}...")
        response = requests.get(url)
        with open(save_path, "wb") as file:
            file.write(response.content)

        decompressed_file_name = os.path.splitext(os.path.basename(save_path))[0]
        decompressed_file_path = os.path.join(folder_path, decompressed_file_name)

        with gzip.open(save_path, "rb") as f_in:
            with open(decompressed_file_path, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

        print(f"{decompressed_file_name} downloaded and extracted.")
    else:
        print(f"{os.path.basename(save_path)} already exists.")


file_info = [
    (
        "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
        "train-images-idx3-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
        "train-labels-idx1-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
        "t10k-images-idx3-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
        "t10k-labels-idx1-ubyte.gz",
    ),
]

folder_name = "tmp/mnist"
folder_path = os.path.join(os.getcwd(), folder_name)

os.makedirs(folder_path, exist_ok=True)  # Create folder if it doesn't exist

# Download and extract each file
for url, file_name in file_info:
    path_to_save = os.path.join(folder_path, file_name)
    download_and_extract_dataset(url, path_to_save, folder_path)

# %%
import numpy as np


def read_idx3_ubyte_image_file(filename):
    """Read IDX3-ubyte formatted image data."""
    with open(filename, "rb") as f:
        magic_num = int.from_bytes(f.read(4), byteorder="big")
        num_images = int.from_bytes(f.read(4), byteorder="big")
        num_rows = int.from_bytes(f.read(4), byteorder="big")
        num_cols = int.from_bytes(f.read(4), byteorder="big")

        if magic_num != 2051:
            raise ValueError(f"Invalid magic number: {magic_num}")

        images = np.zeros((num_images, num_rows, num_cols), dtype=np.uint8)

        for i in range(num_images):
            for r in range(num_rows):
                for c in range(num_cols):
                    pixel = int.from_bytes(f.read(1), byteorder="big")
                    images[i, r, c] = pixel

    return images


def read_idx1_ubyte_label_file(filename):
    """Read IDX1-ubyte formatted label data."""
    with open(filename, "rb") as f:
        magic_num = int.from_bytes(f.read(4), byteorder="big")
        num_labels = int.from_bytes(f.read(4), byteorder="big")

        if magic_num != 2049:
            raise ValueError(f"Invalid magic number: {magic_num}")

        labels = np.zeros(num_labels, dtype=np.uint8)

        for i in range(num_labels):
            labels[i] = int.from_bytes(f.read(1), byteorder="big")

    return labels


# Example usage
folder_path = os.path.join(
    os.getcwd(), folder_name
)  # Adjust this path to where you stored the files

train_images = read_idx3_ubyte_image_file(
    os.path.join(folder_path, "train-images-idx3-ubyte")
)
train_labels = read_idx1_ubyte_label_file(
    os.path.join(folder_path, "train-labels-idx1-ubyte")
)
test_images = read_idx3_ubyte_image_file(
    os.path.join(folder_path, "t10k-images-idx3-ubyte")
)
test_labels = read_idx1_ubyte_label_file(
    os.path.join(folder_path, "t10k-labels-idx1-ubyte")
)

print(
    f"Shape of train_images: {train_images.shape}"
)  # Should output "Shape of train_images: (60000, 28, 28)"
print(
    f"Shape of train_labels: {train_labels.shape}"
)  # Should output "Shape of train_labels: (60000,)"
print(
    f"Shape of test_images: {test_images.shape}"
)  # Should output "Shape of test_images: (10000, 28, 28)"
print(
    f"Shape of test_labels: {test_labels.shape}"
)  # Should output "Shape of test_labels: (10000,)"

# %%
# Reshape the datasets from 3D to 2D
train_images_2d = train_images.reshape(
    train_images.shape[0], -1
)  # -1 infers the size from the remaining dimensions
test_images_2d = test_images.reshape(test_images.shape[0], -1)

train-images-idx3-ubyte.gz already exists.
train-labels-idx1-ubyte.gz already exists.
t10k-images-idx3-ubyte.gz already exists.
t10k-labels-idx1-ubyte.gz already exists.
Shape of train_images: (60000, 28, 28)
Shape of train_labels: (60000,)
Shape of test_images: (10000, 28, 28)
Shape of test_labels: (10000,)


## Transofrming image to binary

In [2]:
# Thresholding
train_images_binary = np.where(train_images_2d > 127, 1, 0)
test_images_binary = np.where(test_images_2d > 127, 1, 0)

# Check the shape and unique values to confirm the conversion
print(train_images_binary.shape)
print(np.unique(train_images_binary))
print(test_images_binary.shape)
print(np.unique(test_images_binary))

(60000, 784)
[0 1]
(10000, 784)
[0 1]


## Continue as normal

In [4]:
from sklearn.tree import DecisionTreeClassifier

# Create and train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=10, random_state=0)
clf.fit(train_images_binary, train_labels)

In [5]:
num_test_samples = len(test_images_2d)
python_predictions = clf.predict(test_images_2d)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

Python accuracy: 81.41000000000001 %


## Try out an SVM

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_images_binary)
test_features_normalized = scaler.transform(test_images_binary)

In [15]:
from sklearn.svm import SVC

clf = SVC(kernel="rbf", random_state=0, C=1000)
clf.fit(train_features_normalized, train_labels)

In [16]:
num_test_samples = len(test_features_normalized)
python_predictions = clf.predict(test_features_normalized)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

print("Number of support vectors", clf.n_support_)
print("Total number of support vectors", sum(clf.n_support_))

Python accuracy: 97.08 %
Number of support vectors [1098  705 2301 2108 1699 2059 1580 2049 2123 2129]
Total number of support vectors 17851


## Try out an MLP neural network

In [12]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
mlp_large = MLPClassifier(
    hidden_layer_sizes=(397,), max_iter=30, alpha=0.0001, solver="adam", random_state=0
)
mlp_large.fit(train_features_normalized, train_labels)



In [13]:
# Evaluate the classifier
accuracy = mlp_large.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [mlp_large.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in mlp_large.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.975
Number of neurons per layer: [784, 397, 10]


## Try out a smaller network

In [17]:
# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(24,), max_iter=30, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)



In [18]:
# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [clf.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf.coefs_]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9508
Number of neurons per layer: [784, 24, 10]


## Try out pruning

In [19]:
import copy

pruned_network = copy.deepcopy(clf)

In [20]:
num_zeros_before = sum(np.sum(layer == 0) for layer in pruned_network.coefs_)
print("Zeros before pruning:", num_zeros_before)

Zeros before pruning: 0


In [21]:
threshold = 1e-1

num_weights = 0
num_changed_weights = 0

for i in range(len(pruned_network.coefs_)):
    flattened_weights = pruned_network.coefs_[i].ravel()
    for j, weight in enumerate(flattened_weights):
        if abs(weight) < threshold:
            flattened_weights[j] = 0
            num_changed_weights += 1
        num_weights += 1

    # Reshape back to the original shape
    pruned_network.coefs_[i] = flattened_weights.reshape(pruned_network.coefs_[i].shape)

print(f"Number of parameters: {num_weights}")
print(f"Number of changed parameters: {num_changed_weights}")

Number of parameters: 19056
Number of changed parameters: 11483


In [22]:
num_zeros_after = sum(np.sum(layer == 0) for layer in pruned_network.coefs_)
print("Zeros after pruning:", num_zeros_after)

Zeros after pruning: 11483


In [23]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [pruned_network.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in pruned_network.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9429
Number of neurons per layer: [784, 24, 10]


## Prune the larger network

In [24]:
pruned_network = copy.deepcopy(mlp_large)

In [25]:
num_zeros_before = sum(np.sum(layer == 0) for layer in pruned_network.coefs_)
print("Zeros before pruning:", num_zeros_before)

Zeros before pruning: 0


In [26]:
threshold = 1e-1

num_weights = 0
num_changed_weights = 0

for i in range(len(pruned_network.coefs_)):
    flattened_weights = pruned_network.coefs_[i].ravel()
    for j, weight in enumerate(flattened_weights):
        if abs(weight) < threshold:
            flattened_weights[j] = 0
            num_changed_weights += 1
        num_weights += 1

    # Reshape back to the original shape
    pruned_network.coefs_[i] = flattened_weights.reshape(pruned_network.coefs_[i].shape)

print(f"Number of parameters: {num_weights}")
print(f"Number of changed parameters: {num_changed_weights}")

Number of parameters: 315218
Number of changed parameters: 265370


In [27]:
num_zeros_after = sum(np.sum(layer == 0) for layer in pruned_network.coefs_)
print("Zeros after pruning:", num_zeros_after)

Zeros after pruning: 265370


In [28]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [pruned_network.coefs_[0].shape[0]] + [
    coef.shape[1] for coef in pruned_network.coefs_
]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9646
Number of neurons per layer: [784, 397, 10]


## Let's try haar features on the binary image

In [29]:
def integral_image(img):
    """
    Compute the integral image of the input img.
    """
    # Convert the image to int32 type
    img = img.astype(np.int32)
    int_img = np.zeros_like(img)

    for x in range(img.shape[1]):
        for y in range(img.shape[0]):
            int_img[y, x] = (
                img[y, x]
                + (int_img[y - 1, x] if y - 1 >= 0 else 0)
                + (int_img[y, x - 1] if x - 1 >= 0 else 0)
                - (int_img[y - 1, x - 1] if x - 1 >= 0 and y - 1 >= 0 else 0)
            )
    return int_img


def compact_haar_features4(img, h=5, w=5):
    int_img = integral_image(img)
    features = []

    for y in range(0, img.shape[0] - h + 1, h):
        for x in range(0, img.shape[1] - w + 1, w):
            # Horizontal edge feature
            white = (
                int_img[y + h // 2 - 1, x + w - 1]
                + int_img[y, x]
                - int_img[y, x + w - 1]
                - int_img[y + h // 2 - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y + h // 2, x]
                - int_img[y + h // 2, x + w - 1]
                - int_img[y + h - 1, x]
            )
            features.append(black - white)

            # Vertical edge feature
            white = (
                int_img[y + h - 1, x + w // 2 - 1]
                + int_img[y, x]
                - int_img[y, x + w // 2 - 1]
                - int_img[y + h - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y, x + w // 2]
                - int_img[y, x + w - 1]
                - int_img[y + h - 1, x + w // 2]
            )
            features.append(black - white)

    return features

In [30]:
haar_1 = compact_haar_features4(train_images_binary[0].reshape(28, 28))
print(haar_1)
len_haar_features = len(haar_1)
print("length of a haar feature", len_haar_features)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 3, -1, -1, -2, -1, 0, 0, 0, 0, 0, 4, 1, -1, 0, 0, 0, 0, 0, 0, 2, 2, 5, 6, 0, 0, 1, 1, 8, 4, -2, 0, 0, 0, 0, 0]
length of a haar feature 50


In [31]:
# compute datasets

num_train = len(train_images)
num_test = len(test_images)

haar_train = np.zeros((num_train, len_haar_features))
haar_test = np.zeros((num_test, len_haar_features))

for i in range(num_train):
    haar_train[i] = compact_haar_features4(train_images_binary[i].reshape(28, 28))

for i in range(num_test):
    haar_test[i] = compact_haar_features4(test_images_binary[i].reshape(28, 28))

In [44]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_haar_features_normalized = scaler.fit_transform(haar_train)
test_haar_features_normalized = scaler.transform(haar_test)

In [46]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(30,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_haar_features_normalized, train_labels)


# Evaluate the classifier
accuracy = clf.score(test_haar_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.8955




In [33]:
layers_sizes = [clf.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf.coefs_]

print("Number of neurons per layer:", layers_sizes)

Number of neurons per layer: [50, 30, 10]


## Add classic features

In [41]:
def average_intensity(image):
    return np.mean(image)


def aspect_ratio(image, threshold=0.5):
    # Threshold the image to create a binary representation
    bin_image = image > threshold
    # Find the bounding box
    row_indices, col_indices = np.nonzero(bin_image)
    max_row, min_row = np.max(row_indices), np.min(row_indices)
    max_col, min_col = np.max(col_indices), np.min(col_indices)

    # Calculate the aspect ratio of the bounding box
    width = max_col - min_col + 1
    height = max_row - min_row + 1

    if height == 0:  # To avoid division by zero
        return 1.0

    return width / height


from scipy.optimize import curve_fit


def linear_func(x, a, b):
    return a * x + b


def avg_dist_to_fitted_line(image, threshold=0.5):
    # Threshold the image
    bin_image = (image > threshold).astype(int)

    # Identify connected components
    labeled_array, num_features = label(bin_image)

    # Find the largest connected component
    largest_component = None
    max_count = 0
    for i in range(1, num_features + 1):
        component = np.where(labeled_array == i)
        count = len(component[0])
        if count > max_count:
            max_count = count
            largest_component = component

    if largest_component is None:
        return 0.0

    x, y = largest_component
    if len(x) <= 1:  # Can't fit a line to a single point or empty set
        return 0.0

    # Fit a line to the largest component
    popt, _ = curve_fit(linear_func, x, y)

    # Calculate the average distance from each point to the line
    distances = np.abs(y - linear_func(x, *popt))
    avg_distance = np.mean(distances)
    return avg_distance


from scipy.ndimage import label


def num_regions_below_threshold(image, threshold=0.5):
    # Threshold the image so that pixels below the threshold are set to 1
    # and those above the threshold are set to 0.
    bin_image = image < threshold

    # Use connected components labeling
    labeled_array, num_features = label(bin_image)

    # Return the number of unique regions
    # (subtracting 1 as one of the labels will be the background)
    return num_features

In [42]:
# compute datasets
num_train = len(train_images_binary)
num_test = len(test_images_binary)

aspect_ratio_train = np.zeros(num_train)
aspect_ratio_test = np.zeros(num_test)

num_white_regions_train = np.zeros(num_train)
num_white_regions_test = np.zeros(num_test)

avg_dist_to_fitted_line_train = np.zeros(num_train)
avg_dist_to_fitted_line_test = np.zeros(num_test)

for i in range(num_train):
    aspect_ratio_train[i] = aspect_ratio(train_images_binary[i].reshape(28, 28))
    num_white_regions_train[i] = num_regions_below_threshold(
        train_images_binary[i].reshape(28, 28)
    )
    avg_dist_to_fitted_line_train[i] = avg_dist_to_fitted_line(
        train_images_binary[i].reshape(28, 28)
    )

for i in range(num_test):
    aspect_ratio_test[i] = aspect_ratio(test_images_binary[i].reshape(28, 28))
    num_white_regions_test[i] = num_regions_below_threshold(
        test_images_binary[i].reshape(28, 28)
    )
    avg_dist_to_fitted_line_test[i] = avg_dist_to_fitted_line(
        test_images_binary[i].reshape(28, 28)
    )



In [113]:
train_features = np.zeros((num_train, 52))

train_features[:, 0] = aspect_ratio_train
train_features[:, 1] = num_white_regions_train
train_features[:, 2:] = haar_train

test_features = np.zeros((num_test, 52))

test_features[:, 0] = aspect_ratio_test
test_features[:, 1] = num_white_regions_test
test_features[:, 2:] = haar_test

In [114]:
print(haar_train[0])
print(train_features[0][0])
print(train_features[0][1])
print(train_features[0][2])
print(train_features[0][3])
print(train_features[0])

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  3.  5.  3.  3. -1. -1.
 -2. -1.  0.  0.  0.  0.  0.  4.  1. -1.  0.  0.  0.  0.  0.  0.  2.  2.
  5.  6.  0.  0.  1.  1.  8.  4. -2.  0.  0.  0.  0.  0.]
0.95
1.0
0.0
0.0
[ 0.95  1.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    3.    5.    3.    3.   -1.   -1.   -2.   -1.    0.    0.
  0.    0.    0.    4.    1.   -1.    0.    0.    0.    0.    0.    0.
  2.    2.    5.    6.    0.    0.    1.    1.    8.    4.   -2.    0.
  0.    0.    0.    0.  ]


In [115]:
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features)

In [119]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf_a = MLPClassifier(
    hidden_layer_sizes=(31,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf_a.fit(train_features_normalized, train_labels)


# Evaluate the classifier
accuracy = clf_a.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

layers_sizes = [clf_a.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf_a.coefs_]

print("Number of neurons per layer:", layers_sizes)

Accuracy: 0.9179
Number of neurons per layer: [52, 31, 10]


