In [1]:
import random
import numpy as np
from ift725.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier. These are the same steps as we used for the
    SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'ift725/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
  
    # subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis = 0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

Train data shape:  (49000, 3073)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3073)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3073)
Test labels shape:  (1000,)
dev data shape:  (500, 3073)
dev labels shape:  (500,)


In [3]:
from ift725.classifiers.softmax import softmax_naive_loss_function
import time

# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(3073, 10) * 0.0001

In [4]:
X = X_dev
y = y_dev


In [5]:
loss = 0.0
dW = np.zeros_like(W)
loss = loss*0
dW = dW*0
num_train = X.shape[0]
num_class = W.shape[1]

In [6]:
scores = X.dot(W)
f = scores - np.max(scores, axis=1, keepdims=True)
f

array([[-0.18104247,  0.        , -0.85221824, ..., -0.10439289,
        -0.6412275 , -0.8101488 ],
       [-0.63658807, -0.35433154,  0.        , ..., -0.33561831,
        -0.37079301, -0.42414129],
       [-0.68235898, -0.60299003, -0.85404082, ..., -0.18372438,
        -0.02639542, -0.57437789],
       ...,
       [-0.7324268 , -0.74505883, -0.29137426, ..., -0.7874497 ,
        -0.60610546,  0.        ],
       [-0.707259  , -1.83676208, -0.53651233, ..., -1.162221  ,
        -0.93154186,  0.        ],
       [-0.38328642, -0.16830871, -0.27042709, ..., -0.0149319 ,
         0.        , -0.8712581 ]])

In [7]:
suma = np.sum(np.exp(f), axis=1)
suma.shape

(500,)

In [8]:
probability = np.exp(f)/np.exp(f).sum(axis=1, keepdims=True)

In [9]:
probability.shape

(500, 10)

In [10]:
loss = np.sum(-np.log(probability[range(num_train), y]))
probability[range(num_train), y] -= 1
probability

array([[ 0.1205019 ,  0.14441743,  0.0615894 , ...,  0.13010152,
         0.0760568 ,  0.0642357 ],
       [ 0.0759809 ,  0.1007597 ,  0.14360551, ...,  0.10266299,
        -0.90088537,  0.0939656 ],
       [ 0.07742282,  0.08381823,  0.06520914, ...,  0.12747447,
        -0.85080636,  0.08625108],
       ...,
       [ 0.07483243,  0.07389309,  0.11631522, ...,  0.07082616,
        -0.91509163,  0.15566063],
       [ 0.10966505, -0.96455693,  0.13008361, ...,  0.06957941,
         0.08763214,  0.22244718],
       [ 0.1051128 ,  0.13032245,  0.11767109, ...,  0.15192524,
         0.1542108 ,  0.06452562]])

In [11]:
dW = X.T.dot(probability)
dW

array([[-1.23932229e+03, -4.73376822e+02, -2.89847538e+01, ...,
        -3.25967613e+02, -8.48569558e+02, -1.56251204e+03],
       [-1.89332930e+03, -7.42702137e+01, -2.12503295e+02, ...,
        -4.15074029e+02, -1.40835042e+03, -1.65596326e+03],
       [-3.19773672e+03, -2.89085761e+01, -1.13885033e+01, ...,
        -5.89641607e+02, -2.32529070e+03, -1.93775167e+03],
       ...,
       [-1.42685204e+03,  5.76930100e+01, -2.63813149e+02, ...,
         3.28336801e+02,  7.12363273e+02, -2.79464728e+02],
       [-2.34049750e+03, -5.70825336e+01, -2.50880078e+02, ...,
         1.02041285e+03,  1.82898658e+02, -3.59818672e+02],
       [ 2.39635273e+00,  3.79009599e+00,  3.83538202e+00, ...,
        -6.07449985e+00,  1.01439689e+01,  8.77245897e+00]])