# TP4 Softmax - Gradient descent - L2 norm regularizer
## OBLIGATORY
### Deadline 29/04/2019 23:59



In this exercise you will:

- implement the **loss function** for the Softmax classifier
- implement the expression for its **analytic gradient**
- implement L2 norm regularizer
- **optimize** the loss function with **SGD**
- use online, full batch and mini batch


In [1]:
from __future__ import print_function

import random
import numpy as np
from data_utils import load_IRIS, load_CIFAR10
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
X_train, y_train, X_test, y_test = load_IRIS(test=True)

# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Training data shape:  (100, 4)
Training labels shape:  (100,)
Test data shape:  (50, 4)
Test labels shape:  (50,)


## Softmax Classifier

### 1 Fill the missing part of the SoftamaxClassifier() class.
Your code  will  be written inside **Softmax_classifier.py**. 




### 2. Fill the missing part in the cell below 

In [3]:
# Run the softhmax classifier for dirrerent hyperparameters (regularization strength and
# learning rate). 
# Set the number of the iteration equal to 500 iteration and add a stopping criterion.
from collections import defaultdict
from Softmax_classifier import SoftmaxClassifier
softmax_classifier = SoftmaxClassifier()
results = {}
best_train = -1
best_train_softmax = None
best_test = -1
best_test_softmax = None
learning_rates = [0.00001, 0.0001, 0.001, 0.01, 0.1, 0, 1, 10]
regularization_strengths = [0.0001, 0.001, 0.01, 0.1, 0, 1, 10, 100]


################################################################################
# TODO:                                                                        #
# Train the Softmax classifier for 500 iterations (and add a stopping criterion)#
#for different learning rate and 
# regularization strength using online, full batch and mini batch GD                                                                             #
#                                                                              #
# compute the train and test accuracy for each case.                           #                                                     #
# save the best train softmax classifer in best_train_softmax.                 #
# and the best test softmax classifer in best_test_softmax.                    #
#                                                                              #
#                                                                              #
#                                                                              #
################################################################################
# Your code

best_acc_train = np.zeros(3)
best_acc_test = np.zeros(3)

#ONLINE GD TRAINING
train_accuracy_online = np.zeros((len(learning_rates), len(regularization_strengths)))
for j in range(0, len(learning_rates)):
    for i in range(0, len(regularization_strengths)):
        loss_online_train, w_opt_online = softmax_classifier.train(X_train, y_train, learning_rate=learning_rates[j], reg=regularization_strengths[i], num_iters=500, batch_size=1)
        y_pred_online_train = softmax_classifier.predict(X_train, y_train, w_opt_online)
        train_accuracy_online[j, i] = ((y_pred_online_train - y_train) == 0).sum() / len(y_pred_online_train)
        
print("trn_accuracy_online: ",train_accuracy_online)  
opt_reg_online, opt_lr_online = np.unravel_index(np.argmax(train_accuracy_online, axis=None), train_accuracy_online.shape)
print("best_trn_accuracy_online: ",train_accuracy_online[opt_reg_online, opt_lr_online]) 

#ONLINE GD TEST
#Uses the parameters learned from the training set
from Softmax_classifier import SoftmaxClassifier
softmax_classifier = SoftmaxClassifier()

loss_online_test, w_opt_online_test = softmax_classifier.train(X_train, y_train, learning_rate=learning_rates[opt_lr_online], reg=regularization_strengths[opt_reg_online], num_iters=500, batch_size=1)
y_pred_online_test = softmax_classifier.predict(X_test, y_test, w_opt_online_test)
test_accuracy_online = ((y_pred_online_test - y_test) == 0).sum() / len(y_pred_online_test)
print("test_accuracy_online: ", test_accuracy_online)
print("opt_lr:", learning_rates[opt_lr_online], "opt_reg:", regularization_strengths[opt_reg_online])

best_acc_train[0] =  train_accuracy_online[opt_reg_online,opt_lr_online]
best_acc_test[0] = test_accuracy_online




#FULL BATCH GD TRAINING
from Softmax_classifier import SoftmaxClassifier
softmax_classifier = SoftmaxClassifier()

train_accuracy_fullBatch = np.zeros((len(learning_rates), len(regularization_strengths)))
for j in range(0, len(learning_rates)):
    for i in range(0, len(regularization_strengths)):
        loss_full_train, w_opt_full = softmax_classifier.train(X_train, y_train, learning_rate=learning_rates[j], reg=regularization_strengths[i], num_iters=500, batch_size=X_train.shape[0])
        y_pred_full_train = softmax_classifier.predict(X_train, y_train, w_opt_full)
        train_accuracy_fullBatch[j, i] = ((y_pred_full_train - y_train) == 0).sum() / len(y_pred_full_train)
        
print("trn_accuracy_fullBatch: ",train_accuracy_fullBatch)  
opt_reg_full, opt_lr_full = np.unravel_index(np.argmax(train_accuracy_fullBatch, axis=None), train_accuracy_fullBatch.shape)
print("best_trn_accuracy_fullBatch: ",train_accuracy_fullBatch[opt_reg_full, opt_lr_full]) 

#FULL BATCH GD TEST 
#Uses the parameters learned from the training set
from Softmax_classifier import SoftmaxClassifier
softmax_classifier = SoftmaxClassifier()

loss_full_test, w_opt_full_test = softmax_classifier.train(X_train, y_train, learning_rate=learning_rates[opt_lr_full], reg=regularization_strengths[opt_reg_full], num_iters=500, batch_size=X_train.shape[0])
y_pred_full_test = softmax_classifier.predict(X_test, y_test, w_opt_full_test)
test_accuracy_fullBatch = ((y_pred_full_test - y_test) == 0).sum() / len(y_pred_full_test)
print("test_accuracy_fullBatch: ", test_accuracy_fullBatch)
print("opt_lr:", learning_rates[opt_lr_full], "opt_reg:", regularization_strengths[opt_reg_full])

best_acc_train[1] =  train_accuracy_fullBatch[opt_reg_full, opt_lr_full]
best_acc_test[1] = test_accuracy_fullBatch




#MINI-BATCH GD TRAINING
from Softmax_classifier import SoftmaxClassifier
softmax_classifier = SoftmaxClassifier()

train_accuracy_miniBatch = np.zeros((len(learning_rates), len(regularization_strengths)))
for j in range(0, len(learning_rates)):
    for i in range(0, len(regularization_strengths)):
        loss_mini_train, w_opt_mini_train = softmax_classifier.train(X_train, y_train, learning_rate=learning_rates[j], reg=regularization_strengths[i], num_iters=500, batch_size=20)
        y_pred_mini_train = softmax_classifier.predict(X_train, y_train, w_opt_mini_train)
        train_accuracy_miniBatch[j, i] = ((y_pred_mini_train - y_train) == 0).sum() / len(y_pred_mini_train)
    
print("trn_accuracy_miniBatch: ",train_accuracy_miniBatch)  
opt_reg_mini, opt_lr_mini = np.unravel_index(np.argmax(train_accuracy_miniBatch, axis=None), train_accuracy_miniBatch.shape)
print("best_trn_accuracy_miniBatch: ",train_accuracy_miniBatch[opt_reg_mini, opt_lr_mini]) 


#MINI-BATCH GD TEST
#Uses the parameters learned from the training set
from Softmax_classifier import SoftmaxClassifier
softmax_classifier = SoftmaxClassifier()

loss_mini_test, w_opt_mini_test = softmax_classifier.train(X_train, y_train, learning_rate=learning_rates[opt_lr_mini], reg=regularization_strengths[opt_reg_mini], num_iters=500, batch_size=20)
y_pred_mini_test = softmax_classifier.predict(X_test, y_test, w_opt_mini_test)
test_accuracy_miniBatch = ((y_pred_mini_test - y_test) == 0).sum() / len(y_pred_mini_test)
print("test_accuracy_miniBatch: ", test_accuracy_miniBatch)
print("opt_lr:", learning_rates[opt_lr_mini], "opt_reg:", regularization_strengths[opt_reg_mini])

best_acc_train[2] =  train_accuracy_miniBatch[opt_reg_mini, opt_lr_mini]
best_acc_test[2] = test_accuracy_miniBatch


best_train = best_acc_train[np.argmax(best_acc_train)]
best_test = best_acc_test[np.argmax(best_acc_test)]



#
###############################################################################
#                              END OF YOUR CODE                                #
################################################################################
    
# Print out results.
for batch_size, lr, reg in sorted(results):
    train_accuracy, test_accuracy = results[(batch_size, lr, reg)]
    print('batch_size %e lr %e reg %e train accuracy: %f test accuracy: %f' % (
                batch_size, lr, reg, train_accuracy, test_accuracy))
    
print('best training accuracy achieved : %f' % best_train)
print('best test accuracy achieved : %f' % best_test)


  calc_loss = (-1/X.shape[0])*np.sum(indicator*np.log(nom/denom), axis=1).sum()
  calc_loss = (-1/X.shape[0])*np.sum(indicator*np.log(nom/denom), axis=1).sum()
  calc_loss = (-1/X.shape[0])*np.sum(indicator*np.log(nom/denom), axis=1).sum()
  grad[i] = const*np.sum(np.repeat(X[:,i], num_classes).reshape((X.shape[0], num_classes))*(indicator - nom/denom), axis=0)


trn_accuracy_online:  [[0.41 0.41 0.41 0.41 0.41 0.41 0.41 0.41]
 [0.41 0.41 0.41 0.46 0.57 0.7  0.95 0.41]
 [0.61 0.73 0.73 0.86 0.73 0.59 0.41 0.41]
 [0.73 0.61 0.75 0.91 0.97 0.73 0.32 0.41]
 [0.91 0.95 0.96 0.64 0.96 0.27 0.41 0.32]
 [0.32 0.32 0.32 0.32 0.32 0.32 0.32 0.32]
 [0.32 0.32 0.32 0.32 0.32 0.32 0.32 0.32]
 [0.32 0.32 0.32 0.32 0.32 0.32 0.32 0.32]]
best_trn_accuracy_online:  0.97
test_accuracy_online:  0.82
opt_lr: 0.1 opt_reg: 0.1
trn_accuracy_fullBatch:  [[0.41 0.41 0.41 0.41 0.41 0.41 0.41 0.41]
 [0.41 0.41 0.41 0.45 0.62 0.71 0.7  0.41]
 [0.73 0.73 0.74 0.74 0.82 0.76 0.41 0.41]
 [0.91 0.95 0.96 0.97 0.96 0.74 0.41 0.41]
 [0.95 0.97 0.97 0.59 0.95 0.59 0.27 0.32]
 [0.32 0.32 0.32 0.32 0.32 0.32 0.32 0.32]
 [0.32 0.32 0.32 0.32 0.32 0.32 0.32 0.32]
 [0.32 0.32 0.32 0.32 0.32 0.32 0.32 0.32]]
best_trn_accuracy_fullBatch:  0.97
test_accuracy_fullBatch:  0.74
opt_lr: 0.01 opt_reg: 0.1
trn_accuracy_miniBatch:  [[0.41 0.41 0.41 0.41 0.41 0.41 0.41 0.41]
 [0.41 0.41 0.41 0

In [4]:
print(np.argmax(best_acc_train))
print(np.argmax(best_acc_test))
print(best_acc_train)
print(best_acc_test)

2
0
[0.97 0.97 0.98]
[0.82 0.74 0.18]
