In [None]:
from __future__ import print_function
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
import numpy as np
from random import shuffle
from random import randrange

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

In [None]:
### Load the raw CIFAR-10 data.
cifar10_dir = '/Users/xiaoxiaoma/cifar-10-batches-py'

# Cleaning up variables to prevent loading data multiple times (which \
# may cause memory issue)
try:
    del X_train, y_train
    del X_test, y_test
    print('Clear previously loaded data.')
except:
    pass
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)


### Subsample the data for more efficient code execution in this exercise
mask = 0
num_train = 500
num_test = 100
num_val = 100
num_dev = 50

mask = range(num_train, num_train + num_val)
X_val = X_train[mask]
y_val = y_train[mask]

mask = range(num_train)
X_train = X_train[mask]
y_train = y_train[mask]

mask = np.random.choice(num_train, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]

mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]


### Reshape the image data into rows
# print (X_train.shape[0])
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))


### Preprocessing: subtract the mean image
# first: compute the image mean based on the training data
mean_image = np.mean(X_train, axis=0)
# print(mean_image[:10]) # print a few of the elements
# plt.figure(figsize=(4,4))
# plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) 
# visualize the mean image
# plt.show()

# second: subtract the mean image from train and test data
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# third: append the bias dimension of ones (i.e. bias trick) so that our 
# SVM only has to worry about optimizing a single weight matrix W.
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

In [None]:
def softmax_loss_naive(X, y, W, reg):
"""
  Softmax loss function, naive implementation (with loops)

  Inputs and outputs are the same as svm_loss_naive.
"""
    loss = 0.0
    loss_i = 0.0
    dW = np.zeros_like(W)
    num_classes = W.shape[1]
    num_train = X.shape[0]

# L_i=−log(e^f(y[i])/∑j e^f(j))  

    for i in range(num_train):
        summ = 0.0
        scores = X[i].dot(W) # scores.shape -->(1,C) or should say, (C,)?
        scores -=np.max(scores) # to imporve Numeric stability, \
                                # avoid potential blowup 
        for j in range(num_classes):
            summ + = np.exp(scores[j])
        
        loss += -np.log(np.exp(scores[y[i]])/summ)
    loss /= num_train
    
    
######################################################################
# TODO: Compute the softmax loss and its gradient using explicit loops.
# Store the loss in loss and the gradient in dW. If you are not careful
# here, it is easy to run into numeric instability. Don't forget the 
# regularization!                                                   
#######################################################################
    
    return loss, dW

In [None]:
def softmax_loss_vectorized(X, y, W, reg):
"""
  Softmax loss function, vectorized version.

  Inputs and outputs are the same as softmax_loss_naive.
"""    
    
    loss = 0.0
    dW = np.zeros_like(W)
    
############################################################################
# TODO: Compute the softmax loss and its gradient using no explicit loops.
# Store the loss in loss and the gradient in dW. If you are not careful
# here, it is easy to run into numeric instability. Don't forget the 
# regularization!                             
#######################################################################    
    
    return loss, dW