In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import warnings
import os.path
import random

In [2]:
EPOCHS = 50

# For simple regression problem
TRAINING_POINTS = 1000

# For fashion-MNIST and similar problems
DATA_ROOT = '/data/cs3450/data/'
FASHION_MNIST_TRAINING = '/data/cs3450/data/fashion_mnist_flattened_training.npz'
FASHION_MNIST_TESTING = '/data/cs3450/data/fashion_mnist_flattened_testing.npz'
CIFAR10_TRAINING = '/data/cs3450/data/cifar10_flattened_training.npz'
CIFAR10_TESTING = '/data/cs3450/data/cifar10_flattened_testing.npz'
CIFAR100_TRAINING = '/data/cs3450/data/cifar100_flattened_training.npz'
CIFAR100_TESTING = '/data/cs3450/data/cifar100_flattened_testing.npz'


In [3]:
def try_gpu(i=0):  #@save
    """Return gpu(i) if exists, otherwise return cpu().
       https://d2l.ai/chapter_deep-learning-computation/use-gpu.html
    """
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')
DEVICE=try_gpu()
DEVICE

device(type='cpu')

In [4]:
def create_linear_training_data():
    """
    This method simply rotates points in a 2D space.
    Be sure to use L2 regression in the place of the final softmax layer before testing on this
    data!
    :return: (x,y) the dataset. x is a numpy array where columns are training samples and
             y is a numpy array where columns are one-hot labels for the training sample.
    """
    x = torch.randn((2, TRAINING_POINTS))
    x1 = x[0:1, :].clone()
    x2 = x[1:2, :]
    y = torch.cat((-x2, x1), axis=0)
    return x, y


def create_folded_training_data():
    """
    This method introduces a single non-linear fold into the sort of data created by create_linear_training_data. Be sure to REMOVE the final softmax layer before testing on this data!
    Be sure to use L2 regression in the place of the final softmax layer before testing on this
    data!
    :return: (x,y) the dataset. x is a numpy array where columns are training samples and
             y is a numpy array where columns are one-hot labels for the training sample.
    """
    x = torch.randn((2, TRAINING_POINTS))
    x1 = x[0:1, :].clone()
    x2 = x[1:2, :]
    x2 *= 2 * ((x2 > 0).float() - 0.5)
    y = torch.cat((-x2, x1), axis=0)
    return x, y


def create_square():
    """
    This is the square example that we looked at in class.
    insideness is true if the points are inside the square.
    :return: (points, insideness) the dataset. points is a 2xN array of points and insideness is true if the point is inside the square.
    """
    win_x = [2,2,3,3]
    win_y = [1,2,2,1]
    win = torch.tensor([win_x,win_y],dtype=torch.float32)
    win_rot = torch.cat((win[:,1:],win[:,0:1]),axis=1)
    t = win_rot - win # edges tangent along side of poly
    rotation = torch.tensor([[0, 1],[-1,0]],dtype=torch.float32)
    normal = rotation @ t # normal vectors to each side of poly
        # torch.matmul(rotation,t) # Same thing

    points = torch.rand((2,2000),dtype = torch.float32)
    points = 4*points

    vectors = points[:,np.newaxis,:] - win[:,:,np.newaxis] # reshape to fill origin
    insideness = (normal[:,:,np.newaxis] * vectors).sum(axis=0)
    insideness = insideness.T
    insideness = insideness > 0
    insideness = insideness.all(axis=1)
    return points, insideness

In [5]:
def load_dataset_flattened(train=True,dataset='Fashion-MNIST',download=False):
    """
    :param train: True for training, False for testing
    :param dataset: 'Fashion-MNIST', 'CIFAR-10', or 'CIFAR-100'
    :param download: True to download. Keep to false afterwords to avoid unneeded downloads.
    :return: (x,y) the dataset. x is a numpy array where columns are training samples and
             y is a numpy array where columns are one-hot labels for the training sample.
    """
    if dataset == 'Fashion-MNIST':
        if train:
            path = FASHION_MNIST_TRAINING
        else:
            path = FASHION_MNIST_TESTING
        num_labels = 10
    elif dataset == 'CIFAR-10':
        if train:
            path = CIFAR10_TRAINING
        else:
            path = CIFAR10_TESTING
        num_labels = 10
    elif dataset == 'CIFAR-100':
        if train:
            path = CIFAR100_TRAINING
        else:
            path = CIFAR100_TESTING
        num_labels = 100
    else:
        raise ValueError('Unknown dataset: '+str(dataset))

    if os.path.isfile(path):
        print('Loading cached flattened data for',dataset,'training' if train else 'testing')
        data = np.load(path)
        x = torch.tensor(data['x'],dtype=torch.float32)
        y = torch.tensor(data['y'],dtype=torch.float32)
        pass
    else:
        class ToTorch(object):
            """Like ToTensor, only to a numpy array"""

            def __call__(self, pic):
                return torchvision.transforms.functional.to_tensor(pic)

        if dataset == 'Fashion-MNIST':
            data = torchvision.datasets.FashionMNIST(
                root=DATA_ROOT, train=train, transform=ToTorch(), download=download)
        elif dataset == 'CIFAR-10':
            data = torchvision.datasets.CIFAR10(
                root=DATA_ROOT, train=train, transform=ToTorch(), download=download)
        elif dataset == 'CIFAR-100':
            data = torchvision.datasets.CIFAR100(
                root=DATA_ROOT, train=train, transform=ToTorch(), download=download)
        else:
            raise ValueError('This code should be unreachable because of a previous check.')
        x = torch.zeros((len(data[0][0].flatten()), len(data)),dtype=torch.float32)
        for index, image in enumerate(data):
            x[:, index] = data[index][0].flatten()
        labels = torch.tensor([sample[1] for sample in data])
        y = torch.zeros((num_labels, len(labels)), dtype=torch.float32)
        y[labels, torch.arange(len(labels))] = 1
        np.savez(path, x=x.detach().numpy(), y=y.detach().numpy())
    return x, y

In [28]:
# TODO: Select your datasource.
dataset = 'Fashion-MNIST'
# dataset = 'CIFAR-10'
# dataset = 'CIFAR-100'

#x_train, y_train = create_linear_training_data()
#x_train, y_train = create_folded_training_data()
#points_train, insideness_train = create_square()
x_train, y_train = load_dataset_flattened(train=True, dataset=dataset, download=False)
# Move selected datasets to GPU
x_train = x_train.to(DEVICE)
y_train = y_train.to(DEVICE)

Loading cached flattened data for Fashion-MNIST training


In [29]:
x_train

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [30]:
y_train

tensor([[0., 1., 1.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])

In [31]:
# TODO: Test the accuracy of your network
x_test, y_test = create_linear_training_data()
# x_test, y_test = load_dataset_flattened(train=False, dataset=dataset, download=False)

# Move the selected datasets to the GPU
x_test = x_test.to(DEVICE)
y_test = y_test.to(DEVICE)

In [32]:
x_test

tensor([[-1.0848,  1.6209,  1.7899,  ..., -0.3476,  0.6443,  1.8259],
        [-1.6644, -1.0515,  1.5957,  ..., -0.5752, -1.0943, -0.3921]])

In [33]:
y_test

tensor([[ 1.6644,  1.0515, -1.5957,  ...,  0.5752,  1.0943,  0.3921],
        [-1.0848,  1.6209,  1.7899,  ..., -0.3476,  0.6443,  1.8259]])

## Lab 3 Training a Neural Network using Autograd

## Computing Forward Propagation Equations

In [34]:
def relu(x):
    return x * (x > 0)

### Test Case #1 (In PDF)

In [35]:
#input
x = torch.tensor([6,7])
#weights
w = torch.tensor(np.array([[0,1,2], [3,4,5]]))

#### Using regularization on the weights

In [36]:
#hidden layer result
h = torch.matmul(x,w)
h = relu(h)
print(h)

tensor([21, 34, 47])


In [37]:
#Output layer result
o = torch.matmul(w,h)
o = relu(o)
print(o)

tensor([128, 434])


#### Regularization

In [38]:
learningrate = 0.1
s = (learningrate/2)*((torch.pow(torch.norm(w.float(),p='fro'),2) + torch.pow(torch.norm(w.float(), p='fro'),2)))
s

tensor(5.5000)

### Test Case #2 (In PDF)

In [39]:
xx = torch.tensor(np.array([20,44]))
ww = torch.tensor(np.array([[-1,2,3], [-5,3,7]]).T)

In [40]:
hh = torch.matmul(ww,xx)
hh = relu(hh)
print(hh)

tensor([  0, 172, 368])


In [41]:
ww2 = torch.tensor(np.array([[-1,2,3], [-5,3,7]]))
oo = torch.matmul(ww2,hh)
oo

tensor([1448, 3092])

#### Regularization

In [42]:
learningrate = 0.1
s = (learningrate/2)*((torch.pow(torch.norm(ww.float(),p='fro'),2) + torch.pow(torch.norm(ww2.float(), p='fro'),2)))
print(s)

tensor(9.7000)


## Backpropagation

### Getting the batch sizes (Implemention of MiniBatches)

In [43]:
def data_iter(batch_size, features, labels):
    n = len(features)
    iss = list(range(n))
    random.shuffle(iss)
    for i in range(0, n, batch_size):
        b = torch.tensor(iss[i:min(i + batch_size, n)])
        yield features[b], labels[b]

### Calculating the Sqaure Loss

In [44]:
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape))**2 /2

### Performing Linear regression

In [45]:
def linreg(X,w,b):
    return relu(torch.matmul(X,w) + b)

### Performing Stochastic Gradient Descent

In [46]:
def sgd(params, lr, batch_size, weight_decay):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param *= weight_decay
            param.grad.zero_()

In [47]:
def softmax(x):
    m = torch.max(x,0)
    v = torch.exp(x - m.values)
    b = torch.sum(v,0)
    t = v / b
    return t

In [48]:
def cross_entropy(X,y):
    x = -torch.sum((X * torch.log(y + 1e-7)))
    return x

### Training Loop For Backprop Using Autograd

In [51]:
#First pair of weights
www = np.random.rand(784,100) * 0.1
w = torch.tensor(www, requires_grad=True)
print("Weight Tensor #1: ", w)
#w = torch.tensor(www, requires_grad=True) 
#First pair of biases
b = torch.zeros(100, requires_grad=True)
#printing out the weight and bias

#Second pair of weights
mm = np.random.rand(100,10) * 0.1
m = torch.tensor(mm, requires_grad=True)
print("Weight Tensor #2: ", m)
#Second pair of bias
b2 = torch.zeros(10, requires_grad=True)
#printing out the second weight and bias

#
print("Original Matrix: ", torch.matmul(w,m))

#learning rate
lr = 0.05
#Number of epoch
num_epoch = 30
#Performing linear regression
net = linreg
#Performing linear regression
net2 = linreg
#Performing sqaured loss
loss = squared_loss
#specified a number for the batch size
batch_size = 30
#using the weight decay within our sgd
weight_decay = 1 - 1e-5

for epoch in range(num_epoch):
    for X,y in data_iter(batch_size, x_train.T, y_train.T):
        l = cross_entropy(net2(net(X,w.float(),b.float()),m.float(),b2.float()),y)
        l.sum().backward()
        sgd([w,b], lr, batch_size, weight_decay)
        sgd([m, b2], lr, batch_size, weight_decay)
    with torch.no_grad():
        train_l = cross_entropy(net2(net(x_test.T,w.float(),b.float()),m.float(),b2.float()),y_test.T)
        print("Epoch:", epoch+1, "\nLoss:", float(train_l.mean()))
#print("Final Matrix:", torch.matmul(w,m))

Weight Tensor #1:  tensor([[0.0402, 0.0170, 0.0461,  ..., 0.0668, 0.0202, 0.0208],
        [0.0933, 0.0818, 0.0280,  ..., 0.0799, 0.0311, 0.0696],
        [0.0522, 0.0434, 0.0980,  ..., 0.0619, 0.0890, 0.0988],
        ...,
        [0.0707, 0.0941, 0.0571,  ..., 0.0335, 0.0206, 0.0898],
        [0.0014, 0.0337, 0.0062,  ..., 0.0462, 0.0870, 0.0963],
        [0.0155, 0.0062, 0.0817,  ..., 0.0679, 0.0252, 0.0240]],
       dtype=torch.float64, requires_grad=True)
Weight Tensor #2:  tensor([[8.9243e-02, 8.5325e-02, 2.8366e-02, 1.9904e-02, 4.3965e-02, 6.0767e-02,
         2.4935e-02, 5.6028e-02, 2.7772e-02, 4.9869e-02],
        [2.4205e-02, 3.9914e-02, 1.9123e-02, 4.5479e-02, 6.1670e-02, 4.7722e-02,
         6.1965e-02, 2.1921e-02, 9.1088e-02, 9.7733e-02],
        [6.9839e-02, 3.5937e-02, 6.3078e-02, 8.1281e-02, 1.8291e-02, 7.6320e-02,
         9.1307e-02, 7.4318e-02, 9.9393e-03, 4.1007e-02],
        [8.4259e-05, 5.1603e-02, 7.9588e-02, 1.7429e-03, 2.0042e-02, 6.8784e-03,
         4.6070e-0

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1000x2 and 784x100)