In [1]:
import os
import sys
import pickle
import numpy as np

In [2]:
a = [[1, 2, -3, 0], [4, -7, -9, 5]]

In [3]:
array = np.array(a)

In [8]:
np.positive(array)

array([[ 1,  2, -3,  0],
       [ 4, -7, -9,  5]])

In [9]:
np.where(array > 0, array, 0)

array([[1, 2, 0, 0],
       [4, 0, 0, 5]])

In [2]:
def load_batch(fpath, label_key='labels'):
    """Internal utility for parsing CIFAR data.
    # Arguments
        fpath: path the file to parse.
        label_key: key for label data in the retrieve
            dictionary.
    # Returns
        A tuple `(data, labels)`.
    """
    with open(fpath, 'rb') as f:
        if sys.version_info < (3,):
            d = pickle.load(f)
        else:
            d = pickle.load(f, encoding='bytes')
            # decode utf8
            d_decoded = {}
            for k, v in d.items():
                d_decoded[k.decode('utf8')] = v
            d = d_decoded
    data = d['data']
    labels = d[label_key]

    #data = data.reshape(data.shape[0], 3, 32, 32)
    data = data.reshape(data.shape[0], 3072)
    return np.array(data), np.array(labels)

In [3]:
path = 'cifar-10-batches-py'
train_fpath = os.path.join('../', path, 'data_batch_1')
val_fpath = os.path.join('../', path, 'data_batch_2')
test_fpath = os.path.join('../', path, 'test_batch')

x_train, y_train = load_batch(train_fpath)
x_val, y_val = load_batch(val_fpath)
x_test, y_test = load_batch(test_fpath)

print('x_train shape:\t', x_train.shape, '\t y_train shape:\t', y_train.shape)
print('x_val shape:\t', x_val.shape, '\t y_val shape:\t', y_val.shape)
print('x_test shape:\t', x_test.shape, '\t y_test shape:\t', y_test.shape)

x_train shape:	 (10000, 3072) 	 y_train shape:	 (10000,)
x_val shape:	 (10000, 3072) 	 y_val shape:	 (10000,)
x_test shape:	 (10000, 3072) 	 y_test shape:	 (10000,)


In [4]:
mean, std = x_train.mean(axis=0), x_train.std(axis=0)
print('mean shape:\t', mean.shape, '\nstd shape:\t', std.shape)

x_train = ( x_train - mean ) / std
x_val = ( x_val- mean ) / std
x_test = ( x_test - mean ) / std

print(x_train.mean(), x_train.std())
print(x_val.mean(), x_val.std())
print(x_test.mean(), x_test.std())

mean shape:	 (3072,) 
std shape:	 (3072,)
2.2611542268199023e-18 0.9999999999999997
-0.002367710988733489 0.9987174761512392
0.007943731845370715 0.995771074950348


In [5]:
K = 10  # number of classes

W = np.random.normal(0, 0.01, (K, 3072))
b = np.random.normal(0, 0.01, K)

In [6]:
def softmax(x):
    """ Standard definition of the softmax function """
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [11]:
def evaluate_classifier(X, W, b):
    P = np.zeros((X.shape[0], K))
    
    for i in range(X.shape[0]):
        P[i] = np.dot(W, X[i]) + b
        
    return np.array([softmax(x) for x in P])

In [12]:
P = evaluate_classifier(x_train[:5], W, b)
P

array([[0.08790861, 0.02824124, 0.04564345, 0.07911984, 0.10143588,
        0.11025037, 0.06123634, 0.05845446, 0.24772125, 0.17998855],
       [0.03082081, 0.04021581, 0.16655865, 0.06627206, 0.08107283,
        0.07951193, 0.12884459, 0.2054497 , 0.04390658, 0.15734703],
       [0.03352015, 0.23399536, 0.08618753, 0.11818484, 0.06456317,
        0.09271226, 0.11940725, 0.09990882, 0.10790776, 0.04361286],
       [0.07572491, 0.0465474 , 0.08270794, 0.10335497, 0.11270224,
        0.223768  , 0.10216423, 0.06196893, 0.08431465, 0.10674673],
       [0.03268444, 0.02391992, 0.07574657, 0.06818139, 0.21457276,
        0.19544687, 0.05187682, 0.06499631, 0.10796978, 0.16460514]])

In [13]:
def compute_cost(X, Y, W, b, lamb):
    
    cost = 0

    P = evaluate_classifier(X, W, b)
    
    for i in range(X.shape[0]):
        cost -= np.log(P[i][Y[i]] + sys.float_info.epsilon)
    
    cost /=  X.shape[0]
    
    cost += lamb * np.sum(W**2)
    
    return cost

In [14]:
lamb = 0.3
compute_cost(x_val, y_val, W, b, lamb)

3.398557739821925

In [56]:
def compute_accuracy(X, Y, W, b):
    
    acc = 0
    
    P = evaluate_classifier(X, W, b)
    
    for i in range(X.shape[0]):
        if Y[i] == np.argmax(P[i]):
            acc += 1
            
    return acc / X.shape[0]

In [90]:
compute_accuracy(x_train, y_train, W, b)

0.106

In [47]:
def one_hot(Y):
    shape = (Y.size, Y.max()+1)
    one_hot = np.zeros(shape)
    rows = np.arange(Y.size)
    one_hot[rows, Y] = 1
    
    return one_hot

In [58]:
def compute_gradients(X, Y, P, W, b, lamda):
    """
    each column of X corresponds to an image and it has size d×n.
    each column of Y (K×n) is the one-hot ground truth label for the corresponding column of X.
    each column of P contains the probability for each label 
        for the image in the corresponding column of X. P has size K×n.
    @return [grad_W, grad_b]
        grad_W is the gradient matrix of the cost J relative to W and has size K×d.
        grad_b is the gradient vector of the cost J relative to b and has size K×1.
    """
    
    batch_size = X.shape[0]
    
    g_batch = - (one_hot(Y) - P)
    grad_loss_w = 1/batch_size * np.dot(g_batch, X.T)
    
    grad_w = grad_loss_w + 2*lamda*W
    grad_b = 1/batch_size * np.sum((g_batch), axis=1).reshape(-1,1)
    
    return grad_b, grad_w

In [59]:
P = evaluate_classifier(x_train[:100], W, b)
lamda = 0.1
[grad_w_num, grad_b_num] = compute_gradients(x_train[:100], y_train[:100], P, W, b, lamda)

ValueError: shapes (100,10) and (3072,100) not aligned: 10 (dim 1) != 3072 (dim 0)

In [23]:
W.shape

(10, 3072)

In [19]:
def ComputeGradsNum(X, Y, P, W, b, lamda, h):
	""" Converted from matlab code """
	no 	= 	W.shape[0]
	d 	= 	X.shape[0]

	grad_W = np.zeros(W.shape);
	grad_b = np.zeros((no, 1));

	c = compute_cost(X, Y, W, b, lamda);
	
	for i in range(len(b)):
		b_try = np.array(b)
		b_try[i] += h
		c2 = compute_cost(X, Y, W, b_try, lamda)
		grad_b[i] = (c2-c) / h

	for i in range(W.shape[0]):
		for j in range(W.shape[1]):
			W_try = np.array(W)
			W_try[i,j] += h
			c2 = compute_cost(X, Y, W_try, b, lamda)
			grad_W[i,j] = (c2-c) / h

	return [grad_W, grad_b]

In [21]:
P = evaluate_classifier(x_train[:5], W, b)
lamda = 0.1
[grad_w_num, grad_b_num] = ComputeGradsNum(x_train[:100], y_train[:100], P, W, b, lamda, 1e-6)

In [25]:
y_train[:100].shape

(100,)

In [26]:
P.shape

(5, 10)