In [2]:
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt

from __future__ import print_function

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
def get_CIFAR10_data(num_training=49000, num_val=1000, num_test=1000, num_dev=500):
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    mask = np.arange(num_training, num_training+num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = np.arange(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = np.arange(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]
    
    X_train = X_train.reshape((X_train.shape[0],-1))
    X_val = X_val.reshape((X_val.shape[0],-1))
    X_test = X_test.reshape((X_test.shape[0],-1))
    X_dev = X_dev.reshape((X_dev.shape[0],-1))
    
    mean_img = np.mean(X_train, axis=0)
    X_train -= mean_img
    X_val -= mean_img
    X_test -= mean_img
    X_dev -= mean_img
    
    X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
    X_val = np.hstack((X_val, np.ones((X_val.shape[0], 1))))
    X_test = np.hstack((X_test, np.ones((X_test.shape[0], 1))))
    X_dev = np.hstack((X_dev, np.ones((X_dev.shape[0], 1))))
    
    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev

    try:
       del X_train, y_train
       del X_test, y_test
       print('Clear previously loaded data.')
    except:
       pass

X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

Train data shape:  (49000, 3073)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3073)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3073)
Test labels shape:  (1000,)
dev data shape:  (500, 3073)
dev labels shape:  (500,)


In [37]:
from cs231n.classifiers.softmax import softmax_loss_naive
from cs231n.gradient_check import grad_check_sparse
import time

W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.323544
sanity check: 2.302585
numerical: -0.515340 analytic: -0.515340, relative error: 5.895610e-08
numerical: -2.069148 analytic: -2.069148, relative error: 1.066375e-08
numerical: 3.801051 analytic: 3.801051, relative error: 1.682355e-08
numerical: 3.215129 analytic: 3.215129, relative error: 3.635512e-09
numerical: 0.582542 analytic: 0.582542, relative error: 2.032481e-08
numerical: 4.386342 analytic: 4.386342, relative error: 1.856252e-08
numerical: -1.473942 analytic: -1.473942, relative error: 1.736991e-08
numerical: 0.881228 analytic: 0.881228, relative error: 3.219992e-08
numerical: 1.000707 analytic: 1.000707, relative error: 1.288456e-07
numerical: 1.229176 analytic: 1.229176, relative error: 1.656035e-08


In [38]:
W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.436847
sanity check: 2.302585
numerical: 2.727743 analytic: 2.727743, relative error: 8.389053e-09
numerical: 1.096912 analytic: 1.096912, relative error: 9.185693e-08
numerical: -0.536266 analytic: -0.536266, relative error: 2.531658e-08
numerical: -0.170921 analytic: -0.170921, relative error: 4.777911e-08
numerical: -2.014265 analytic: -2.014265, relative error: 2.506138e-08
numerical: -5.213677 analytic: -5.213678, relative error: 1.385523e-08
numerical: 1.382830 analytic: 1.382830, relative error: 2.348121e-08
numerical: 4.870171 analytic: 4.870171, relative error: 1.561781e-08
numerical: 0.031351 analytic: 0.031351, relative error: 3.894605e-06
numerical: 1.954365 analytic: 1.954365, relative error: 1.620158e-08


In [42]:
from cs231n.classifiers.softmax import softmax_loss_vectorized

loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.421490
sanity check: 2.302585
numerical: 1.815581 analytic: 1.815581, relative error: 3.226324e-09
numerical: 2.897426 analytic: 2.897426, relative error: 1.554385e-10
numerical: -0.447616 analytic: -0.447616, relative error: 1.514487e-07
numerical: 2.643426 analytic: 2.643426, relative error: 8.689624e-09
numerical: 0.415965 analytic: 0.415964, relative error: 2.456939e-07
numerical: -5.543487 analytic: -5.543487, relative error: 5.325049e-09
numerical: -0.985358 analytic: -0.985358, relative error: 4.578919e-08
numerical: 1.374049 analytic: 1.374049, relative error: 1.579397e-08
numerical: -0.193109 analytic: -0.193110, relative error: 2.334576e-07
numerical: -0.688246 analytic: -0.688246, relative error: 6.917326e-08


In [43]:
loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 5e1)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.436847
sanity check: 2.302585
numerical: 0.520001 analytic: 0.520001, relative error: 3.123269e-08
numerical: -2.537698 analytic: -2.537698, relative error: 9.631761e-09
numerical: -0.250981 analytic: -0.250981, relative error: 4.390918e-07
numerical: -1.549709 analytic: -1.549709, relative error: 4.200822e-09
numerical: 1.483730 analytic: 1.483730, relative error: 3.311022e-08
numerical: 3.311037 analytic: 3.311036, relative error: 1.147055e-08
numerical: 3.049473 analytic: 3.049472, relative error: 1.131710e-08
numerical: -5.869287 analytic: -5.869287, relative error: 9.595099e-09
numerical: -1.341475 analytic: -1.341475, relative error: 2.387533e-08
numerical: -2.456425 analytic: -2.456425, relative error: 6.199383e-09


In [44]:
tic = time.time()
loss_naive, grads_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

tic = time.time()
loss_vec, grads_vec = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vec, toc - tic))
print('Loss difference: %f' % np.abs(loss_naive - loss_vec))

difference = np.linalg.norm(grads_vec-grads_naive, ord='fro')
print('difference: %f' % difference)

naive loss: 2.421490e+00 computed in 0.019969s
Vectorized loss: 2.421490e+00 computed in 0.006658s
Loss difference: 0.000000
