Testing 

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.dpi"] = 72
from IPython.display import display
import numpy as np
import pandas as pd

import torch
import torch.nn as nn

print(f'PyTorch version= {torch.__version__}')


PyTorch version= 2.7.1


In [4]:
DATA_DIR = '/Users/samihamer/Desktop/Advanced Applied AI Summer 2025/Module 2/MNIST/raw'

In [5]:
def load_mnist(path, kind='train'):
    import os
    import struct
    labels_path = os.path.join(path, '%s-labels-idx1-ubyte' % kind)
    images_path = os.path.join(path, '%s-images-idx3-ubyte' % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)
        with open(images_path, 'rb') as imgpath:
            magic, num, rows, cols = struct.unpack(">IIII",imgpath.read(16))
            images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 28, 28, 1)
            images = ((images / 255.) - .5) * 2
    return images, labels

In [7]:
X_tr, y_tr = load_mnist(DATA_DIR, kind='train')
print(f'N= {X_tr.shape[0]}, HxW= {X_tr.shape[1]}x{X_tr.shape[2]}')

N= 60000, HxW= 28x28


In [8]:
X_tr, y_tr = load_mnist(DATA_DIR, kind='train')
print(f'N= {X_tr.shape[0]}, HxW= {X_tr.shape[1]}x{X_tr.shape[2]}')

X_ts, y_ts = load_mnist(DATA_DIR, kind='t10k')
print(f'N= {X_ts.shape[0]}, HxW= {X_ts.shape[1]}x{X_ts.shape[2]}')


N= 60000, HxW= 28x28
N= 10000, HxW= 28x28


In [9]:
# reshape to work with Conv2D - Batch, Chn, H, W
X_tr = X_tr.reshape(X_tr.shape[0], 1, X_tr.shape[1], X_tr.shape[2])
X_ts = X_ts.reshape(X_ts.shape[0], 1, X_ts.shape[1], X_ts.shape[2])

IMG_CHANNEL= 1  # color channel
MLP_HIDDEN= 64  # Hidden layer size

In [11]:
class PyTorchMLP(nn.Module):
    def __init__(self, epochs=10, eta=0.001, minibatch_size=5000, seed=0):
        super().__init__()
        self.random = np.random.RandomState(seed)  # shuffle mini batches
        self.epochs = epochs  # number of iterations
        self.eta = eta  # learning rate
        self.minibatch_size = minibatch_size  # size of training batch - 1 would not work
        self.optimizer = None
        self.loss_func = nn.CrossEntropyLoss()
        self.model = None

    def init_layers(self, _M:int, _K:int) -> None:
        # data structure
        self.model = nn.Sequential(
            nn.Conv2d(IMG_CHANNEL, MLP_HIDDEN, 5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(MLP_HIDDEN),

            nn.Conv2d(MLP_HIDDEN, MLP_HIDDEN*2, 5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Flatten(start_dim=1),

            # 4*4 is computed in the above cell
            nn.Linear(MLP_HIDDEN*2 * 4*4, 1024),  # 1024 arbitrary
            nn.BatchNorm1d(1024),

            nn.Linear(1024, _K),
        ).to(device)

    def predict(self, _X):
        _X = torch.FloatTensor(_X).to(device, non_blocking=True)
        assert self.model is not None
        self.model.eval()
        with torch.no_grad():
            logits = self.model(_X)
        self.model.train()
        # probs = nn.functional.softmax(logits, dim=1)
        preds = torch.argmax(logits, dim=1)
        return preds.cpu().numpy()

    def predict_proba(self, _X):
        _X = torch.FloatTensor(_X).to(device, non_blocking=True)
        assert self.model is not None
        self.model.eval()
        with torch.no_grad():
            logits = self.model(_X)
        self.model.train()
        probs = nn.functional.softmax(logits, dim=1)
        # preds = torch.argmax(logits, dim=1)
        return probs.cpu().numpy()

    def fit(self, _X_train, _y_train, info=False):
        import sys
        n_features= _X_train.shape[1]
        n_output= np.unique(_y_train).shape[0]  # number of class labels

        _X_train = torch.FloatTensor(_X_train).to(device, non_blocking=True)
        _y_train = torch.LongTensor(_y_train).to(device, non_blocking=True).long()

        self.init_layers(n_features, n_output)
        self.optimizer = torch.optim.Rprop(self.model.parameters(), lr=self.eta)  # connect model to optimizer

        for e in range(self.epochs):
            indices = np.arange(_X_train.shape[0])
            self.random.shuffle(indices)  # shuffle the data each epoch

            for start_idx in range(0, indices.shape[0] - self.minibatch_size + 1, self.minibatch_size):
                batch_idx = indices[start_idx:start_idx + self.minibatch_size]
                self.optimizer.zero_grad()

                net_out = self.model(_X_train[batch_idx])

                loss = self.loss_func(net_out, _y_train[batch_idx])
                loss.backward()
                self.optimizer.step()

                if info:
                    sys.stderr.write(f"\r{e+1:03d} Loss: {loss.item():6.5f}")
                    sys.stderr.flush()
        return self


In [12]:
%%time

mlp = PyTorchMLP(epochs=13).to(device)
mlp.fit(X_tr, y_tr, info=True)

# Testing dataset
y_pred = mlp.predict(X_ts)

print(f'Accuracy= {np.sum(y_pred==y_ts)/len(X_ts):.3f}')

013 Loss: 0.01740

Accuracy= 0.991
CPU times: user 15min 55s, sys: 7min 14s, total: 23min 10s
Wall time: 15min 55s


In [16]:
from lime import lime_image
from lime.wrappers.scikit_image import SegmentationAlgorithm
import matplotlib.pyplot as plt

# Create LIME explainer
explainer = lime_image.LimeImageExplainer()

# Define segmentation algorithm
segmenter = SegmentationAlgorithm('quickshift', kernel_size=4, max_dist=200, ratio=0.2)

# Function to preprocess images for LIME
def preprocess_for_lime(images):
    return images.reshape(-1, 28, 28)

# Function to get model predictions for LIME
def predict_fn(images):
    # Reshape images to match model input
    images = images.reshape(-1, 1, 28, 28)
    # Get probabilities for all classes
    probs = mlp.predict_proba(images)
    # Return probabilities for the predicted class only
    return probs[:, np.argmax(probs[0])]

# Get explanations for each digit class
plt.figure(figsize=(15, 10))
for digit in range(10):
    # Find an example of the current digit
    digit_indices = np.where(y_ts == digit)[0]
    if len(digit_indices) > 0:
        example_idx = digit_indices[0]
        example = X_ts[example_idx].reshape(28, 28)
        
        # Get explanation
        explanation = explainer.explain_instance(
            example,
            predict_fn,
            segmentation_fn=segmenter,
            top_labels=1,
            hide_color=0,
            num_samples=1000
        )
        
        # Plot original image and explanation
        plt.subplot(2, 5, digit + 1)
        temp, mask = explanation.get_image_and_mask(
            explanation.top_labels[0],
            positive_only=True,
            num_features=5,
            hide_rest=True
        )
        plt.imshow(mask, cmap='RdBu', alpha=0.7)
        plt.imshow(example, cmap='gray', alpha=0.5)
        plt.title(f'Digit {digit}')
        plt.axis('off')

plt.tight_layout()
plt.show()


  0%|          | 0/1000 [00:00<?, ?it/s]

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

<Figure size 1080x720 with 0 Axes>