In [2]:
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt

from __future__ import print_function

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
def get_CIFAR10_data(num_training=49000, num_val=1000, num_test=1000, num_dev=500):
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    mask = np.arange(num_training, num_training+num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = np.arange(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = np.arange(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]
    
    X_train = X_train.reshape((X_train.shape[0],-1))
    X_val = X_val.reshape((X_val.shape[0],-1))
    X_test = X_test.reshape((X_test.shape[0],-1))
    X_dev = X_dev.reshape((X_dev.shape[0],-1))
    
    mean_img = np.mean(X_train, axis=0)
    X_train -= mean_img
    X_val -= mean_img
    X_test -= mean_img
    X_dev -= mean_img
    
    X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
    X_val = np.hstack((X_val, np.ones((X_val.shape[0], 1))))
    X_test = np.hstack((X_test, np.ones((X_test.shape[0], 1))))
    X_dev = np.hstack((X_dev, np.ones((X_dev.shape[0], 1))))
    
    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev

    try:
       del X_train, y_train
       del X_test, y_test
       print('Clear previously loaded data.')
    except:
       pass

X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

Train data shape:  (49000, 3073)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3073)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3073)
Test labels shape:  (1000,)
dev data shape:  (500, 3073)
dev labels shape:  (500,)


In [46]:
from cs231n.classifiers.softmax import softmax_loss_naive
from cs231n.gradient_check import grad_check_sparse
import time

W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.392790
sanity check: 2.302585
numerical: 2.326685 analytic: 2.326685, relative error: 1.920158e-08
numerical: -4.766981 analytic: -4.766981, relative error: 1.239652e-09
numerical: -0.679583 analytic: -0.679583, relative error: 2.591898e-08
numerical: -4.126045 analytic: -4.126045, relative error: 5.349668e-10
numerical: -0.912941 analytic: -0.912941, relative error: 4.218948e-08
numerical: 1.123925 analytic: 1.123925, relative error: 3.160244e-08
numerical: -3.896705 analytic: -3.896705, relative error: 3.350579e-09
numerical: -0.072943 analytic: -0.072944, relative error: 1.315906e-06
numerical: 3.216281 analytic: 3.216281, relative error: 7.318327e-09
numerical: -0.268001 analytic: -0.268001, relative error: 1.675658e-08


In [47]:
W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.379224
sanity check: 2.302585
numerical: -2.936351 analytic: -2.936351, relative error: 1.354822e-08
numerical: -1.495440 analytic: -1.495440, relative error: 4.551448e-09
numerical: 2.255896 analytic: 2.255896, relative error: 1.372079e-08
numerical: -0.337261 analytic: -0.337261, relative error: 2.366239e-07
numerical: -0.680771 analytic: -0.680771, relative error: 1.147085e-08
numerical: -1.026642 analytic: -1.026642, relative error: 2.000904e-08
numerical: 2.712696 analytic: 2.712696, relative error: 8.578982e-09
numerical: 0.404190 analytic: 0.404190, relative error: 1.030118e-07
numerical: -2.789124 analytic: -2.789124, relative error: 2.029008e-08
numerical: -1.050986 analytic: -1.050986, relative error: 6.133700e-08


In [48]:
from cs231n.classifiers.softmax import softmax_loss_vectorized

loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.364067
sanity check: 2.302585
numerical: -0.095231 analytic: -0.095231, relative error: 2.609341e-07
numerical: -2.304765 analytic: -2.304765, relative error: 6.181373e-08
numerical: 0.219032 analytic: 0.219032, relative error: 1.058366e-07
numerical: 0.099840 analytic: 0.099839, relative error: 1.148820e-06
numerical: 0.179334 analytic: 0.179334, relative error: 2.485430e-07
numerical: 0.286631 analytic: 0.286631, relative error: 4.662907e-08
numerical: 3.339376 analytic: 3.339375, relative error: 2.841446e-08
numerical: 1.412158 analytic: 1.412158, relative error: 1.261559e-08
numerical: -0.818849 analytic: -0.818850, relative error: 1.231463e-07
numerical: -0.251875 analytic: -0.251875, relative error: 3.645802e-08


In [49]:
loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 5e1)
print('loss: %f' % loss)
print('sanity check: %f' % (np.log(10)))

f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

loss: 2.379224
sanity check: 2.302585
numerical: 1.488291 analytic: 1.488291, relative error: 1.101785e-08
numerical: 0.091320 analytic: 0.091320, relative error: 1.048846e-06
numerical: 1.329217 analytic: 1.329217, relative error: 4.549728e-08
numerical: 2.513550 analytic: 2.513550, relative error: 5.727604e-08
numerical: 0.832895 analytic: 0.832895, relative error: 1.887034e-08
numerical: 3.792607 analytic: 3.792607, relative error: 1.121168e-08
numerical: -1.131150 analytic: -1.131150, relative error: 9.047976e-09
numerical: 1.550063 analytic: 1.550063, relative error: 5.412928e-08
numerical: 0.950381 analytic: 0.950381, relative error: 1.194170e-07
numerical: 3.453530 analytic: 3.453530, relative error: 1.993230e-08


In [50]:
tic = time.time()
loss_naive, grads_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

tic = time.time()
loss_vec, grads_vec = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vec, toc - tic))
print('Loss difference: %f' % np.abs(loss_naive - loss_vec))

difference = np.linalg.norm(grads_vec-grads_naive, ord='fro')
print('difference: %f' % difference)

naive loss: 2.364067e+00 computed in 0.019523s
Vectorized loss: 2.364067e+00 computed in 0.006603s
Loss difference: 0.000000
difference: 0.000000
