Scratch Jupyter Noteboook

In [None]:
import numpy as np
from numpy.typing import NDArray
import matplotlib.pyplot as plt
import nonlinear_approximator as na
from importlib import reload
import torch
import torchvision
import PIL
%matplotlib inline
from dask.distributed import Client, LocalCluster
import dask
import dask.array as da

Dask Client for Distributed Computation 

# Load and Plot MNIST Dataset 

In [None]:
def one_hot_ten(int_label: torch.Tensor) -> NDArray[np.floating]:
    oh = np.zeros((10,))
    oh[int_label] = 1
    return oh

def to_numpy_arr(img: PIL.Image) -> NDArray[np.floating]:
    # rescale 255 to +/- 1 
    arr = np.asarray(img).flatten()
    arr = arr / 255 # 0 --> 1
    arr = arr - .5  # -.5 --> .5
    arr = 2 * arr   # -1 --> 1
    return arr
    
    


In [None]:
train_data = torchvision.datasets.MNIST(root='./data', download=True, train=True, transform=to_numpy_arr, target_transform=one_hot_ten)
test_data = torchvision.datasets.MNIST(root='./data', download=True, train=False, transform=to_numpy_arr, target_transform=one_hot_ten)

In [None]:
num_rows = 2
num_cols = 5

fig, axes = plt.subplots(num_rows, num_cols, figsize=(1.5*num_cols,2*num_rows))
for i in range(num_rows*num_cols):
    ax = axes[i//num_cols, i%num_cols]
    ax.imshow(train_data[i][0].reshape((28, 28)), cmap='gray')
    ax.set_title('Label: {}'.format(np.argmax(train_data[i][1])))
plt.tight_layout()
plt.show()

# Parameters

In [None]:
reload(na)
reload(na.model)
reload(na.activations)
reload(na.params)
reload(na.training)

config = na.params.RegressionParams(
    width=1000,
    depth=50,
    input_dimension=len(train_data[0][0]),
    transform_type=na.activations.TransformType.TENT,
    transform_params=na.params.TentParams(mu=1.99),
    output_dimension=len(train_data[0][1]),
    batch_size=1000,
)


# Instantiate Model

In [None]:
model = na.model.NonlinearRegressorModel(config)
imgs_train, labels_train = zip(*train_data)
imgs_train = da.array(imgs_train).persist()
labels_train = da.array(labels_train).persist()

imgs_test, labels_test = zip(*test_data)
imgs_test = da.array(imgs_test).persist()
labels_test = da.array(labels_test).persist()

print(f"Loaded training data images with shape {imgs_train.shape}, and labels with shape {labels_train.shape}")
print(f"Loaded test data images with shape {imgs_test.shape}, and labels with shape {labels_test.shape}")


# Train MNIST Classifier

In [None]:
model.fit(imgs_train[:, :], labels_train[:, :])

In [None]:
probs_train = model.predict(imgs_train[:, :])
preds_train = probs_train.argmax(axis=0)
acc_train = (sum(
    preds_train == (labels_train.argmax(axis=1)[:])
    ) / len(preds_train)).compute()

In [None]:
plt.hist(preds_train, bins=10)
plt.hist(labels_train.argmax(axis=1)[:1000].compute(), bins=10, alpha=0.3)
plt.title(f"Histogram of model predicted classifications on training data. Accuracy = {100 *acc_train}%")
plt.xlabel("Digit")
plt.ylabel(f"Number of classifciations (N={len(preds_train)})")
plt.show()

In [None]:
probs_test = model.predict(imgs_train[:,:1000]) 
preds_test = probs_test.T.argmax(axis=0)
acc_test = sum(preds_test==labels_test.argmax(axis=0)[:1000]) / len(preds_test)

plt.hist(preds_test, bins=10)
plt.hist(labels_test.argmax(axis=0)[:1000], bins=10, alpha=0.3)
plt.title(f"Histogram of model predicted classifications on test data. Accuracy = {100 *acc_test}%")
plt.xlabel("Digit")
plt.ylabel(f"Number of classifciations (N={len(preds_test)})")
plt.show()