In [1]:
import torch
import torchvision

torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3090'

In [2]:
mnist = torchvision.datasets.MNIST('./data/', download=True)

mnist_train = [mnist[i] for i in range(50000)]
mnist_test = [mnist[i + 50000] for i in range(10000)]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST\raw\train-images-idx3-ubyte.gz to ./data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST\raw\train-labels-idx1-ubyte.gz to ./data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST\raw\t10k-images-idx3-ubyte.gz to ./data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data/MNIST\raw



In [3]:
import numpy as np
print(mnist[0])
img = np.array(mnist[0][0])
print(img.max(), img.min(), img.shape)

(<PIL.Image.Image image mode=L size=28x28 at 0x2053664C250>, 5)
255 0 (28, 28)


In [4]:
y_counts = np.zeros((10,))
for _, y in mnist_train:
    y_counts[y] += 1

print(y_counts)

y_counts = np.zeros((10,))
for _, y in mnist_test:
    y_counts[y] += 1

print(y_counts)

[4932. 5678. 4968. 5101. 4859. 4506. 4951. 5175. 4842. 4988.]
[ 991. 1064.  990. 1030.  983.  915.  967. 1090. 1009.  961.]


In [5]:
y_idx_list = [[] for _ in range(10)]
for idx, data in enumerate(mnist_train):
    y_idx_list[data[1]].append(idx)

import json
with open('./data/mnist_class_idx.txt', 'w+') as jfile:
    json.dump(y_idx_list, jfile)

In [6]:
def sample_mnist(num_per_class=100):
    Xs = []
    Ys = []
    with open('./data/mnist_class_idx.txt') as jfile:
        indices_list = json.load(jfile)
    for class_idx in range(len(indices_list)):
        X = np.zeros((num_per_class, 28, 28))
        Y = np.zeros((num_per_class,))
        sample_indices = np.random.choice(indices_list[class_idx], size=num_per_class, replace=False)
        for X_idx, sample_idx in enumerate(sample_indices):
            cur_X, cur_Y = mnist_train[sample_idx]
            X[X_idx] = np.array(cur_X)
            Y[X_idx] = cur_Y
        Xs.append(X)
        Ys.append(Y)
    X_raw = np.concatenate(Xs, axis=0)
    Y_raw = np.concatenate(Ys, axis=0)
    idx_arr = np.arange(X_raw.shape[0])
    np.random.shuffle(idx_arr)
    return X_raw[idx_arr], Y_raw[idx_arr]

In [7]:
X, Y = sample_mnist(10)
print(X[0], Y)

[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 214.
  225.  44.   0.  24. 219.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 108. 249.
  232.  16.   0. 158. 253.   0.   0.   0.   0.   0.   0.  

In [8]:
X_subset, Y_subset = sample_mnist(100)
with open('./data/mnist_train_100.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_subset, Y=Y_subset)

In [9]:
X_subset, Y_subset = sample_mnist(300)
with open('./data/mnist_train_300.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_subset, Y=Y_subset)

In [10]:
X_subset, Y_subset = sample_mnist(500)
with open('./data/mnist_train_500.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_subset, Y=Y_subset)

In [11]:
X_subset, Y_subset = sample_mnist(1000)
with open('./data/mnist_train_1000.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_subset, Y=Y_subset)

In [12]:
X_subset, Y_subset = sample_mnist(2500)
with open('./data/mnist_train_2500.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_subset, Y=Y_subset)

In [13]:
X = np.zeros((10000, 28, 28))
Y = np.zeros((10000))

for idx, data in enumerate(mnist_test):
    x_raw, y_raw = data
    X[idx] = np.array(x_raw)
    Y[idx] = y_raw

idx_arr = np.arange(X.shape[0])
np.random.shuffle(idx_arr)
X_shuffle = X[idx_arr]
Y_shuffle = Y[idx_arr]

with open('./data/mnist_test.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_shuffle, Y=Y_shuffle)

In [14]:
X = np.zeros((50000, 28, 28))
Y = np.zeros((50000))

for idx, data in enumerate(mnist_train):
    x_raw, y_raw = data
    X[idx] = np.array(x_raw)
    Y[idx] = y_raw

idx_arr = np.arange(X.shape[0])
np.random.shuffle(idx_arr)
X_shuffle = X[idx_arr]
Y_shuffle = Y[idx_arr]

with open('./data/mnist_train_full.npz', 'wb+') as data_file:
    np.savez(data_file, X=X_shuffle, Y=Y_shuffle)

In [8]:
from data import load_mnist

X, Y = load_mnist(100)
print(X.shape, Y.shape)

torch.Size([1000, 28, 28]) torch.Size([1000])


In [4]:
X, Y = load_mnist(100, True)
print(X.shape, Y.shape)

torch.Size([1000, 784]) torch.Size([1000])


In [7]:
X, Y = load_mnist(0, False)
print(X.shape, Y.shape)

torch.Size([50000, 28, 28]) torch.Size([50000])


## Generate Data for Regression Task

In [2]:
import data, models, train, measurements
import optimizer as opt
import numpy as np

X, Y = data.load_mnist(num_per_class=500)
Y_array = Y.numpy()
binary_Y = np.zeros_like(Y_array, dtype=np.float32)
binary_Y[Y_array >= 5] = 1

binary_Y_tensor = torch.from_numpy(binary_Y.reshape((-1, 1)))

test_X, test_Y= data.load_mnist(test=True)
test_Y_array = test_Y.numpy()
binary_test_Y = np.zeros_like(test_Y_array, dtype=np.float32)
binary_test_Y[test_Y_array >= 5] = 1
binary_test_Y_tensor = torch.from_numpy(binary_test_Y.reshape((-1, 1)))



In [3]:
model = models.fully_connected_net(28 * 28, 1, [400, 400], 'relu').cuda()
bce_loss = torch.nn.BCELoss()
sigmoid_layer = torch.nn.Sigmoid()
loss_fn = lambda x, y: bce_loss(sigmoid_layer(x), y)
torch.save({'model_state_dict': model.state_dict()}, './temp/model_init')

output = model(X.cuda())
print(output)
print(loss_fn(output, binary_Y_tensor.cuda()))

tensor([[-0.0330],
        [-0.0483],
        [-0.0422],
        ...,
        [-0.0481],
        [-0.0203],
        [-0.0411]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor(0.6930, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward0>)


In [4]:
lr = 0.5
mom = 0.
batch_size = 500
phys_bs = 5000
num_epochs = 100

msrs = measurements.Measurement(verbose=True)
msrs.add_train_recorder('Binary Cross Entropy', phys_bs, verbose=True)
msrs.add_train_recorder('Binary Accuracy', phys_bs, verbose=True)
# msrs.add_train_recorder('Cross Entropy Sharpness', phys_bs, verbose=True)
# msrs.add_train_recorder('Cross Entropy Sharpness V2', phys_bs, verbose=True)
msrs.add_test_recorder('Binary Cross Entropy', phys_bs, verbose=True)
msrs.add_test_recorder('Binary Accuracy', phys_bs, verbose=True)

optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom)

In [6]:
model.load_state_dict(torch.load('./temp/model_init')['model_state_dict'])
train.train((X, binary_Y_tensor), (test_X, binary_test_Y_tensor), model, loss_fn, optimizer, batch_size, num_epochs, msrs)

Epoch #0
  Metrics on training data:
    Binary Cross Entropy: 0.595177412033081.
    Binary Accuracy: 0.7586.
  Metrics on testing data:
    Binary Cross Entropy: 0.5912918448448181.
    Binary Accuracy: 0.764.
Epoch #1
  Metrics on training data:
    Binary Cross Entropy: 0.5124824047088623.
    Binary Accuracy: 0.7436.
  Metrics on testing data:
    Binary Cross Entropy: 0.5009844899177551.
    Binary Accuracy: 0.7535.
Epoch #2
  Metrics on training data:
    Binary Cross Entropy: 0.4326765537261963.
    Binary Accuracy: 0.806.
  Metrics on testing data:
    Binary Cross Entropy: 0.4238400161266327.
    Binary Accuracy: 0.8098.
Epoch #3
  Metrics on training data:
    Binary Cross Entropy: 0.4090377688407898.
    Binary Accuracy: 0.819.
  Metrics on testing data:
    Binary Cross Entropy: 0.4000132083892822.
    Binary Accuracy: 0.8301.
Epoch #4
  Metrics on training data:
    Binary Cross Entropy: 0.42782557010650635.
    Binary Accuracy: 0.7902.
  Metrics on testing data:
    Bina

    Binary Accuracy: 0.998.
  Metrics on testing data:
    Binary Cross Entropy: 0.10385051369667053.
    Binary Accuracy: 0.9664.
Epoch #40
  Metrics on training data:
    Binary Cross Entropy: 0.012210759334266186.
    Binary Accuracy: 0.999.
  Metrics on testing data:
    Binary Cross Entropy: 0.10418964922428131.
    Binary Accuracy: 0.9665.
Epoch #41
  Metrics on training data:
    Binary Cross Entropy: 0.01089436560869217.
    Binary Accuracy: 0.999.
  Metrics on testing data:
    Binary Cross Entropy: 0.10462489724159241.
    Binary Accuracy: 0.967.
Epoch #42
  Metrics on training data:
    Binary Cross Entropy: 0.00970342569053173.
    Binary Accuracy: 0.9994.
  Metrics on testing data:
    Binary Cross Entropy: 0.10542047023773193.
    Binary Accuracy: 0.9671.
Epoch #43
  Metrics on training data:
    Binary Cross Entropy: 0.00874212197959423.
    Binary Accuracy: 0.9998.
  Metrics on testing data:
    Binary Cross Entropy: 0.10616222769021988.
    Binary Accuracy: 0.9675.
Epo

Epoch #79
  Metrics on training data:
    Binary Cross Entropy: 0.0012899483554065228.
    Binary Accuracy: 1.0.
  Metrics on testing data:
    Binary Cross Entropy: 0.12688615918159485.
    Binary Accuracy: 0.968.
Epoch #80
  Metrics on training data:
    Binary Cross Entropy: 0.0012536980211734772.
    Binary Accuracy: 1.0.
  Metrics on testing data:
    Binary Cross Entropy: 0.12725742161273956.
    Binary Accuracy: 0.968.
Epoch #81
  Metrics on training data:
    Binary Cross Entropy: 0.0012188756372779608.
    Binary Accuracy: 1.0.
  Metrics on testing data:
    Binary Cross Entropy: 0.12761792540550232.
    Binary Accuracy: 0.968.
Epoch #82
  Metrics on training data:
    Binary Cross Entropy: 0.0011850454611703753.
    Binary Accuracy: 1.0.
  Metrics on testing data:
    Binary Cross Entropy: 0.12800219655036926.
    Binary Accuracy: 0.968.
Epoch #83
  Metrics on training data:
    Binary Cross Entropy: 0.0011534688528627157.
    Binary Accuracy: 1.0.
  Metrics on testing data:


In [14]:
train_reg_labels = model(X.cuda()).detach().cpu().numpy().flatten()
test_reg_labels = model(test_X.cuda()).detach().cpu().numpy().flatten()

normalized_train_labels = train_reg_labels * np.sqrt(train_reg_labels.shape[0]) / np.linalg.norm(train_reg_labels)
normalized_test_labels = test_reg_labels * np.sqrt(test_reg_labels.shape[0]) / np.linalg.norm(test_reg_labels)

print(normalized_train_labels, normalized_test_labels)

[ 0.450537   -1.6227163  -0.97107434 ... -1.5593404  -1.0308453
 -1.1431814 ] [-0.37283143 -1.322201   -0.717003   ...  0.90055007 -0.9361966
 -0.8873947 ]


In [15]:
with open('./data/mnist_train_500_reg_labels.npz', 'wb+') as data_file:
    np.savez(data_file, Y=normalized_train_labels)
with open('./data/mnist_test_reg_labels.npz', 'wb+') as data_file:
    np.savez(data_file, Y=normalized_test_labels)