In [4]:
import gzip
import os
import shutil

import requests


def download_and_extract_dataset(url, save_path, folder_path):
    """Download and extract dataset if it doesn't exist."""
    if not os.path.exists(save_path):
        print(f"Downloading {os.path.basename(save_path)}...")
        response = requests.get(url)
        with open(save_path, "wb") as file:
            file.write(response.content)

        decompressed_file_name = os.path.splitext(os.path.basename(save_path))[0]
        decompressed_file_path = os.path.join(folder_path, decompressed_file_name)

        with gzip.open(save_path, "rb") as f_in:
            with open(decompressed_file_path, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

        print(f"{decompressed_file_name} downloaded and extracted.")
    else:
        print(f"{os.path.basename(save_path)} already exists.")


file_info = [
    (
        "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
        "train-images-idx3-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
        "train-labels-idx1-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
        "t10k-images-idx3-ubyte.gz",
    ),
    (
        "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
        "t10k-labels-idx1-ubyte.gz",
    ),
]

folder_name = "tmp/mnist"
folder_path = os.path.join(os.getcwd(), folder_name)

os.makedirs(folder_path, exist_ok=True)  # Create folder if it doesn't exist

# Download and extract each file
for url, file_name in file_info:
    path_to_save = os.path.join(folder_path, file_name)
    download_and_extract_dataset(url, path_to_save, folder_path)

# %%
import numpy as np


def read_idx3_ubyte_image_file(filename):
    """Read IDX3-ubyte formatted image data."""
    with open(filename, "rb") as f:
        magic_num = int.from_bytes(f.read(4), byteorder="big")
        num_images = int.from_bytes(f.read(4), byteorder="big")
        num_rows = int.from_bytes(f.read(4), byteorder="big")
        num_cols = int.from_bytes(f.read(4), byteorder="big")

        if magic_num != 2051:
            raise ValueError(f"Invalid magic number: {magic_num}")

        images = np.zeros((num_images, num_rows, num_cols), dtype=np.uint8)

        for i in range(num_images):
            for r in range(num_rows):
                for c in range(num_cols):
                    pixel = int.from_bytes(f.read(1), byteorder="big")
                    images[i, r, c] = pixel

    return images


def read_idx1_ubyte_label_file(filename):
    """Read IDX1-ubyte formatted label data."""
    with open(filename, "rb") as f:
        magic_num = int.from_bytes(f.read(4), byteorder="big")
        num_labels = int.from_bytes(f.read(4), byteorder="big")

        if magic_num != 2049:
            raise ValueError(f"Invalid magic number: {magic_num}")

        labels = np.zeros(num_labels, dtype=np.uint8)

        for i in range(num_labels):
            labels[i] = int.from_bytes(f.read(1), byteorder="big")

    return labels


# Example usage
folder_path = os.path.join(
    os.getcwd(), folder_name
)  # Adjust this path to where you stored the files

train_images = read_idx3_ubyte_image_file(
    os.path.join(folder_path, "train-images-idx3-ubyte")
)
train_labels = read_idx1_ubyte_label_file(
    os.path.join(folder_path, "train-labels-idx1-ubyte")
)
test_images = read_idx3_ubyte_image_file(
    os.path.join(folder_path, "t10k-images-idx3-ubyte")
)
test_labels = read_idx1_ubyte_label_file(
    os.path.join(folder_path, "t10k-labels-idx1-ubyte")
)

print(
    f"Shape of train_images: {train_images.shape}"
)  # Should output "Shape of train_images: (60000, 28, 28)"
print(
    f"Shape of train_labels: {train_labels.shape}"
)  # Should output "Shape of train_labels: (60000,)"
print(
    f"Shape of test_images: {test_images.shape}"
)  # Should output "Shape of test_images: (10000, 28, 28)"
print(
    f"Shape of test_labels: {test_labels.shape}"
)  # Should output "Shape of test_labels: (10000,)"

# %%
# Reshape the datasets from 3D to 2D
train_images_2d = train_images.reshape(
    train_images.shape[0], -1
)  # -1 infers the size from the remaining dimensions
test_images_2d = test_images.reshape(test_images.shape[0], -1)

train-images-idx3-ubyte.gz already exists.
train-labels-idx1-ubyte.gz already exists.
t10k-images-idx3-ubyte.gz already exists.
t10k-labels-idx1-ubyte.gz already exists.
Shape of train_images: (60000, 28, 28)
Shape of train_labels: (60000,)
Shape of test_images: (10000, 28, 28)
Shape of test_labels: (10000,)


In [5]:
def integral_image(img):
    """
    Compute the integral image of the input img.
    """
    # Convert the image to int32 type
    img = img.astype(np.int32)
    int_img = np.zeros_like(img)

    for x in range(img.shape[1]):
        for y in range(img.shape[0]):
            int_img[y, x] = (
                img[y, x]
                + (int_img[y - 1, x] if y - 1 >= 0 else 0)
                + (int_img[y, x - 1] if x - 1 >= 0 else 0)
                - (int_img[y - 1, x - 1] if x - 1 >= 0 and y - 1 >= 0 else 0)
            )
    return int_img


def haar_feature(int_img, type, x, y, w, h):
    """
    Compute the Haar feature value for a given type, position and size.
    """
    if type == "2h":
        white = (
            int_img[y, x + w]
            - int_img[y, x]
            + int_img[y + h, x + w]
            - int_img[y + h, x]
        )
        black = (
            int_img[y, x + 2 * w]
            - int_img[y, x + w]
            + int_img[y + h, x + 2 * w]
            - int_img[y + h, x + w]
        )
        return black - white

    elif type == "2v":
        white = (
            int_img[y, x]
            - int_img[y, x + w]
            + int_img[y + h // 2, x]
            - int_img[y + h // 2, x + w]
        )
        black = (
            int_img[y + h // 2, x]
            - int_img[y + h // 2, x + w]
            + int_img[y + h, x]
            - int_img[y + h, x + w]
        )
        return black - white

    elif type == "3h":
        white1 = (
            int_img[y, x + w]
            - int_img[y, x]
            + int_img[y + h, x + w]
            - int_img[y + h, x]
        )
        black = (
            int_img[y, x + 2 * w]
            - int_img[y, x + w]
            + int_img[y + h, x + 2 * w]
            - int_img[y + h, x + w]
        )
        white2 = (
            int_img[y, x + 3 * w]
            - int_img[y, x + 2 * w]
            + int_img[y + h, x + 3 * w]
            - int_img[y + h, x + 2 * w]
        )
        return black - (white1 + white2)

    elif type == "3v":
        white1 = (
            int_img[y, x]
            - int_img[y, x + w]
            + int_img[y + h // 3, x]
            - int_img[y + h // 3, x + w]
        )
        black = (
            int_img[y + h // 3, x]
            - int_img[y + h // 3, x + w]
            + int_img[y + 2 * h // 3, x]
            - int_img[y + 2 * h // 3, x + w]
        )
        white2 = (
            int_img[y + 2 * h // 3, x]
            - int_img[y + 2 * h // 3, x + w]
            + int_img[y + h, x]
            - int_img[y + h, x + w]
        )
        return black - (white1 + white2)

    else:
        return 0


def extract_haar_features(img):
    """
    Extracts Haar features from an MNIST image.
    """
    int_img = integral_image(img)
    features = []
    for type in ["2h", "2v", "3h", "3v"]:
        for x in range(0, 28):
            for y in range(0, 28):
                for w in range(1, 14):  # limiting width to avoid out-of-bounds
                    if type == "2h" and x + 2 * w <= 27 and y + 1 <= 27:
                        features.append(haar_feature(int_img, type, x, y, w, 1))
                    elif type == "2v" and y + 2 * w <= 27 and x + w <= 27:
                        features.append(haar_feature(int_img, type, x, y, w, 1))
                    elif type == "3h" and x + 3 * w <= 27 and y + 1 <= 27:
                        features.append(haar_feature(int_img, type, x, y, w, 1))
                    elif type == "3v" and y + 3 * w <= 27 and x + w <= 27:
                        features.append(haar_feature(int_img, type, x, y, w, 1))
    return features


def compact_haar_features(img):
    """Extract Haar features using a 7x7 window for horizontal and vertical edges."""
    int_img = integral_image(img)

    w, h = 7, 7  # Window size
    features = []

    for y in range(0, img.shape[0] - h):
        for x in range(0, img.shape[1] - w):
            # Horizontal edge feature
            white = (
                int_img[y + h // 2, x]
                - int_img[y + h // 2, x + w]
                + int_img[y, x + w]
                - int_img[y, x]
            )
            black = (
                int_img[y + h, x + w]
                - int_img[y + h, x]
                - int_img[y + h // 2, x + w]
                + int_img[y + h // 2, x]
            )
            features.append(black - white)

            # Vertical edge feature
            white = (
                int_img[y, x + w // 2]
                - int_img[y, x]
                + int_img[y + h, x + w // 2]
                - int_img[y + h, x]
            )
            black = (
                int_img[y + h, x + w]
                - int_img[y + h, x + w // 2]
                - int_img[y, x + w]
                + int_img[y, x + w // 2]
            )
            features.append(black - white)

    return features


def compact_haar_features2(img):
    int_img = integral_image(img)
    h, w = 7, 7  # Size of the grid cell
    features = []

    for y in range(0, img.shape[0] - h, h):
        for x in range(0, img.shape[1] - w, w):
            # Horizontal edge feature
            white = (
                int_img[y, x]
                + int_img[y + h // 2, x + w - 1]
                - int_img[y + h // 2, x]
                - int_img[y, x + w - 1]
            )
            black = (
                int_img[y + h // 2, x]
                + int_img[y + h, x + w - 1]
                - int_img[y + h, x]
                - int_img[y + h // 2, x + w - 1]
            )
            features.append(black - white)

            # Vertical edge feature
            white = (
                int_img[y, x]
                + int_img[y + h, x + w // 2 - 1]
                - int_img[y, x + w // 2 - 1]
                - int_img[y + h, x]
            )
            if x + w == img.shape[1]:  # Check if we're at the boundary
                black = (
                    int_img[y, x + w // 2]
                    + int_img[y + h, x + w - 1]
                    - int_img[y + h, x + w - 1]
                    - int_img[y + h, x + w // 2 - 1]
                )
            else:
                black = (
                    int_img[y, x + w // 2]
                    + int_img[y + h, x + w - 1]
                    - int_img[y, x + w]
                    - int_img[y + h, x + w // 2 - 1]
                )
            features.append(black - white)

    return features


def compact_haar_features3(img, h=7, w=7):
    int_img = integral_image(img)
    # h, w = 7, 7  # Size of the grid cell
    features = []

    for y in range(0, img.shape[0], h):
        for x in range(0, img.shape[1], w):
            # Horizontal edge feature
            white = (
                int_img[y + h // 2 - 1, x + w - 1]
                + int_img[y, x]
                - int_img[y, x + w - 1]
                - int_img[y + h // 2 - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y + h // 2, x]
                - int_img[y + h // 2, x + w - 1]
                - int_img[y + h - 1, x]
            )
            features.append(black - white)

            # Vertical edge feature
            white = (
                int_img[y + h - 1, x + w // 2 - 1]
                + int_img[y, x]
                - int_img[y, x + w // 2 - 1]
                - int_img[y + h - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y, x + w // 2]
                - int_img[y, x + w - 1]
                - int_img[y + h - 1, x + w // 2]
            )
            features.append(black - white)

    return features


def compact_haar_features4(img, h=5, w=5):
    int_img = integral_image(img)
    features = []

    for y in range(0, img.shape[0] - h + 1, h):
        for x in range(0, img.shape[1] - w + 1, w):
            # Horizontal edge feature
            white = (
                int_img[y + h // 2 - 1, x + w - 1]
                + int_img[y, x]
                - int_img[y, x + w - 1]
                - int_img[y + h // 2 - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y + h // 2, x]
                - int_img[y + h // 2, x + w - 1]
                - int_img[y + h - 1, x]
            )
            features.append(black - white)

            # Vertical edge feature
            white = (
                int_img[y + h - 1, x + w // 2 - 1]
                + int_img[y, x]
                - int_img[y, x + w // 2 - 1]
                - int_img[y + h - 1, x]
            )
            black = (
                int_img[y + h - 1, x + w - 1]
                + int_img[y, x + w // 2]
                - int_img[y, x + w - 1]
                - int_img[y + h - 1, x + w // 2]
            )
            features.append(black - white)

    return features

In [62]:
haar_1 = compact_haar_features4(train_images_2d[0].reshape(28, 28))
print(haar_1)
len_haar_features = len(haar_1)
print("length of a haar feature", len_haar_features)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 660, 1265, 849, 766, -218, -224, -501, -234, 0, 0, 0, 0, 238, 1152, 506, -336, 0, 0, 0, 0, 0, 0, 462, 423, 1049, 1305, 0, 0, 191, 191, 1749, 917, -608, 26, -83, -81, 0, 0]
length of a haar feature 50


In [64]:
# compute datasets

num_train = len(train_images)
num_test = len(test_images)

haar_train = np.zeros((num_train, len_haar_features))
haar_test = np.zeros((num_test, len_haar_features))

for i in range(num_train):
    haar_train[i] = compact_haar_features4(train_images_2d[i].reshape(28, 28))

for i in range(num_test):
    haar_test[i] = compact_haar_features4(test_images_2d[i].reshape(28, 28))

In [65]:
train_features = haar_train
test_features = haar_test

In [66]:
from sklearn.tree import DecisionTreeClassifier

# Create and train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=10, random_state=0)
clf.fit(train_features, train_labels)

In [67]:
import numpy as np

num_test_samples = len(test_features)
python_predictions = clf.predict(test_features)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

Python accuracy: 82.26 %


In [68]:
print("classifier depth", clf.get_depth())

classifier depth 10


## Try out an SVM

In [72]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features)

In [33]:
from sklearn.svm import SVC

In [81]:
clf = SVC(kernel="rbf", random_state=0, C=1000)
clf.fit(train_features_normalized, train_labels)

In [82]:
num_test_samples = len(test_features)
python_predictions = clf.predict(test_features_normalized)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

print("Number of support vectors", clf.n_support_)
print("Total number of support vectors", sum(clf.n_support_))

Python accuracy: 94.34 %
Number of support vectors [1039  542 1985 1904 1445 1742 1167 1311 1962 1562]
Total number of support vectors 14659


## Try out an MLP neural network

In [79]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(30,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)


# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9104




In [80]:
layers_sizes = [clf.coefs_[0].shape[0]] + [coef.shape[1] for coef in clf.coefs_]

print("Number of neurons per layer:", layers_sizes)

Number of neurons per layer: [50, 30, 10]


## Experiments with finer haar features, at fixed positions

In [15]:
def compute_haar_features(image):
    if image.shape != (28, 28):
        raise ValueError("Input image must be of shape 28x28.")

    features = []

    # Sliding window
    for i in range(0, 28):  # Slide vertically
        for j in range(0, 28):  # Slide horizontally

            if i % 2 != 0 or j % 2 != 0:
                continue

            if i + 4 > 28 or j + 4 > 28:
                continue

            # Extract 4x4 window
            window = image[i : i + 4, j : j + 4]

            # Horizontal feature
            horizontal_feature_value = np.sum(window[0:2, :]) - np.sum(window[2:4, :])

            # Vertical feature
            vertical_feature_value = np.sum(window[:, 0:2]) - np.sum(window[:, 2:4])

            features.append(horizontal_feature_value)
            features.append(vertical_feature_value)

    return features

In [17]:
haar_1 = compute_haar_features(train_images_2d[0].reshape(28, 28))
print(haar_1)
len_haar_features = len(haar_1)
print("length of a haar feature", len_haar_features)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18446744073709551595, 18446744073709551595, 18446744073709551559, 18446744073709551601, 18446744073709551318, 18446744073709551390, 18446744073709551153, 61, 18446744073709550994, 18446744073709551396, 18446744073709550821, 47, 18446744073709551242, 374, 0, 0, 0, 0, 0, 0, 18446744073709551567, 18446744073709551567, 18446744073709551010, 18446744073709551108, 18446744073709550305, 18446744073709551419, 18446744073709549954, 18446744073709551420, 18446744073709549732, 18446744073709551518, 18446744073709549892, 18446744073709551392, 18446744073709550497, 499, 18446744073709551033, 18446744073709551335, 18446744073709551480, 382, 76, 672, 0, 0, 0, 0, 0, 0, 31, 18446744073709551549, 18446744073709551496, 18446744073709550418, 18446744073709551353, 18446744073709551261, 18446744073709551469, 18446744073709551343, 586, 490, 946, 18446744073709551324, 897, 1123, 1205, 18446744073709551

  horizontal_feature_value = np.sum(window[0:2, :]) - np.sum(window[2:4, :])
  vertical_feature_value = np.sum(window[:, 0:2]) - np.sum(window[:, 2:4])


In [18]:
# compute datasets

num_train = len(train_images)
num_test = len(test_images)

haar_train = np.zeros((num_train, len_haar_features))
haar_test = np.zeros((num_test, len_haar_features))

for i in range(num_train):
    haar_train[i] = compute_haar_features(train_images_2d[i].reshape(28, 28))

for i in range(num_test):
    haar_test[i] = compute_haar_features(test_images_2d[i].reshape(28, 28))

  horizontal_feature_value = np.sum(window[0:2, :]) - np.sum(window[2:4, :])
  vertical_feature_value = np.sum(window[:, 0:2]) - np.sum(window[:, 2:4])


In [19]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(haar_train)
test_features_normalized = scaler.transform(haar_test)

In [21]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(174,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)

# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9642




In [22]:
import copy

from helper import prune_neural_network

pruned_network = copy.deepcopy(clf)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 60552
Number of changed weight parameters: 38650
Number of bias parameters: 184
Number of changed bias parameters: 119
Percentage of weights pruned: 63.83%
Percentage of biases pruned: 64.67%
Remaining number of non-zero weights: 21902
Remaining number of non-zero biases: 65


In [23]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9609


## Step size 3, window size 6x6

In [24]:
def compute_haar_features(image):
    if image.shape != (28, 28):
        raise ValueError("Input image must be of shape 28x28.")

    features = []

    # Sliding window
    for i in range(0, 28, 3):  # Slide vertically with a step of 3
        for j in range(0, 28, 3):  # Slide horizontally with a step of 3

            if i + 6 > 28 or j + 6 > 28:
                continue

            # Extract 6x6 window
            window = image[i : i + 6, j : j + 6]

            # Horizontal feature
            horizontal_feature_value = np.sum(window[0:3, :]) - np.sum(window[3:6, :])

            # Vertical feature
            vertical_feature_value = np.sum(window[:, 0:3]) - np.sum(window[:, 3:6])

            features.append(horizontal_feature_value)
            features.append(vertical_feature_value)

    return features

In [25]:
haar_1 = compute_haar_features(train_images_2d[0].reshape(28, 28))
print(haar_1)
len_haar_features = len(haar_1)
print("length of a haar feature", len_haar_features)

[0, 0, 0, 0, 0, 0, 18446744073709551577, 18446744073709551577, 18446744073709551297, 18446744073709551375, 18446744073709550969, 18446744073709551529, 18446744073709550620, 18446744073709551354, 18446744073709550987, 629, 0, 0, 18446744073709551062, 18446744073709551062, 18446744073709549260, 18446744073709550368, 18446744073709547714, 18446744073709551240, 18446744073709547610, 18446744073709551328, 18446744073709549170, 1192, 18446744073709551109, 49, 33, 1225, 0, 0, 474, 18446744073709550982, 1452, 18446744073709549624, 1860, 18446744073709550846, 2871, 1013, 2896, 1476, 1503, 311, 596, 596, 0, 0, 80, 18446744073709551536, 893, 18446744073709550861, 735, 18446744073709549859, 18446744073709550841, 1501, 18446744073709550894, 1066, 18446744073709551591, 25, 0, 0, 0, 0, 0, 0, 11, 18446744073709551605, 1301, 18446744073709550247, 882, 18446744073709550800, 18446744073709550050, 988, 18446744073709550458, 1208, 0, 0, 0, 0, 0, 0, 18446744073709551478, 18446744073709551478, 18446744073709

  horizontal_feature_value = np.sum(window[0:3, :]) - np.sum(window[3:6, :])
  vertical_feature_value = np.sum(window[:, 0:3]) - np.sum(window[:, 3:6])


In [26]:
# compute datasets

num_train = len(train_images)
num_test = len(test_images)

haar_train = np.zeros((num_train, len_haar_features))
haar_test = np.zeros((num_test, len_haar_features))

for i in range(num_train):
    haar_train[i] = compute_haar_features(train_images_2d[i].reshape(28, 28))

for i in range(num_test):
    haar_test[i] = compute_haar_features(test_images_2d[i].reshape(28, 28))

  horizontal_feature_value = np.sum(window[0:3, :]) - np.sum(window[3:6, :])
  vertical_feature_value = np.sum(window[:, 0:3]) - np.sum(window[:, 3:6])


In [27]:
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(haar_train)
test_features_normalized = scaler.transform(haar_test)

In [29]:
from sklearn.neural_network import MLPClassifier

# Define and initialize the MLP classifier
clf = MLPClassifier(
    hidden_layer_sizes=(69,), max_iter=20, alpha=0.0001, solver="adam", random_state=0
)
clf.fit(train_features_normalized, train_labels)

# Evaluate the classifier
accuracy = clf.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9529




In [30]:
pruned_network = copy.deepcopy(clf)
pruned_network = prune_neural_network(pruned_network, 1e-1, 1e-1)

Number of weight parameters: 9522
Number of changed weight parameters: 4241
Number of bias parameters: 79
Number of changed bias parameters: 30
Percentage of weights pruned: 44.54%
Percentage of biases pruned: 37.97%
Remaining number of non-zero weights: 5281
Remaining number of non-zero biases: 49


In [31]:
# Evaluate the classifier
accuracy = pruned_network.score(test_features_normalized, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.9494


## Try out an SVM

In [34]:
clf = SVC(kernel="rbf", random_state=0, C=1000)
clf.fit(train_features_normalized, train_labels)

In [36]:
num_test_samples = len(test_features_normalized)
python_predictions = clf.predict(test_features_normalized)
python_accuracy = np.sum(python_predictions == test_labels) / num_test_samples
print(f"Python accuracy: {100*python_accuracy} %")

print("Number of support vectors", clf.n_support_)
print("Total number of support vectors", sum(clf.n_support_))

Python accuracy: 96.15 %
Number of support vectors [1025  698 2016 1830 1573 1990 1107 1616 2259 1972]
Total number of support vectors 16086
