In [1]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cse493g1/assignments/assignment3/'
FOLDERNAME = 'cse493g1/assignment3/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

# This downloads the COCO dataset to your Drive
# if it doesn't already exist.
%cd /content/drive/My\ Drive/$FOLDERNAME/cse493g1/datasets/
!bash get_datasets.sh
%cd /content/drive/My\ Drive/$FOLDERNAME

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/cse493g1/assignment3/cse493g1/datasets
/content/drive/My Drive/cse493g1/assignment3


# Multi-Layer Fully Connected Network Part 2
In this exercise, you will extend your fully connected network from Assignment 2 with Dropout and Normalization Layers. First, you will copy and paste all the necessary parts from Assignment 2. Then you will re-train your model from A2 as a baseline. Next, you will complete the batchnorm and dropout notebook, and then return to this notebook and create an improved model using dropout and normalization.

In [2]:
# Setup cell.
import time
import numpy as np
import matplotlib.pyplot as plt
from cse493g1.classifiers.fc_net import *
from cse493g1.data_utils import get_CIFAR10_data
from cse493g1.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from cse493g1.solver import Solver

%matplotlib inline
plt.rcParams["figure.figsize"] = (10.0, 8.0)  # Set default size of plots.
plt.rcParams["image.interpolation"] = "nearest"
plt.rcParams["image.cmap"] = "gray"

%load_ext autoreload
%autoreload 2

def rel_error(x, y):
    """Returns relative error."""
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

	You will need to compile a Cython extension for a portion of this assignment.
	The instructions to do this will be given in a section of the notebook below.


In [3]:
# Load the (preprocessed) CIFAR-10 data.
data = get_CIFAR10_data()
for k, v in list(data.items()):
    print(f"{k}: {v.shape}")

X_train: (49000, 3, 32, 32)
y_train: (49000,)
X_val: (1000, 3, 32, 32)
y_val: (1000,)
X_test: (1000, 3, 32, 32)
y_test: (1000,)


# Copy necessary parts from A2.
Fill in the following functions by copying and pasting your answers from A2:
`affine_forward` in `cse493g1/layers.py`
`affine_backward` in `cse493g1/layers.py`
`relu_forward` in `cse493g1/layers.py`
`relu_backward` in `cse493g1/layers.py`
`softmax_loss` in `cse493g1/layers.py`
`sgd_momentum` in `cse493g1/optim.py`
`rmsprop` in `cse493g1/optim.py`
`adam` in `cse493g1/optim.py`



# Train baseline model from A2
Copy and Paste your `FullyConnectedNet` model from `cse493g1/classifiers/fc_net.py` in Assignment 2 into `FullyConnectedNetBasic` in the file `cse493g1/classifiers/fc_net.py` in this assignment. Use the best hyperparms that you found from the previous assignment to train this model. Call this model `best_model_basic`

In [4]:
best_model_basic = None

################################################################################
# TODO: Train the best FullyConnectedNetBasic that you can on CIFAR-10. Store your best model in  #
# the best_model_basic variable.                                                     #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

lr = 0.00019810883630280872
reg = 0.0051124710979752646
weight_scale = 0.011387564370987585
H1, H2 = 100, 100
print(f"lr: {lr}, reg: {reg}, weight_scale: {weight_scale}")
net = FullyConnectedNetBasic(hidden_dims=[H1, H2], reg=reg ,weight_scale=weight_scale,
  dtype=np.float64)

solver = Solver(net, data,
                    update_rule='adam',
                    optim_config={
                      'learning_rate': lr,
                    },
                    lr_decay=0.95,
                    num_epochs=15, batch_size=200, print_every=100,
                    verbose = True)
solver.train()
y_val_pred = np.argmax(net.loss(data['X_val']), axis=1)
val_acc = (y_val_pred == data['y_val']).mean()
best_model_basic = net

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

lr: 0.00019810883630280872, reg: 0.0051124710979752646, weight_scale: 0.011387564370987585
(Iteration 1 / 3675) loss: 2.426512
(Epoch 0 / 15) train acc: 0.163000; val_acc: 0.172000
(Iteration 101 / 3675) loss: 1.646942
(Iteration 201 / 3675) loss: 1.622002
(Epoch 1 / 15) train acc: 0.465000; val_acc: 0.475000
(Iteration 301 / 3675) loss: 1.751139
(Iteration 401 / 3675) loss: 1.353533
(Epoch 2 / 15) train acc: 0.509000; val_acc: 0.479000
(Iteration 501 / 3675) loss: 1.481615
(Iteration 601 / 3675) loss: 1.251106
(Iteration 701 / 3675) loss: 1.466859
(Epoch 3 / 15) train acc: 0.560000; val_acc: 0.509000
(Iteration 801 / 3675) loss: 1.455386
(Iteration 901 / 3675) loss: 1.361844
(Epoch 4 / 15) train acc: 0.562000; val_acc: 0.486000
(Iteration 1001 / 3675) loss: 1.276741
(Iteration 1101 / 3675) loss: 1.353588
(Iteration 1201 / 3675) loss: 1.298143
(Epoch 5 / 15) train acc: 0.560000; val_acc: 0.515000
(Iteration 1301 / 3675) loss: 1.248636
(Iteration 1401 / 3675) loss: 1.240691
(Epoch 6 / 1

# Evaluate baseline model from A2
Evaluate above baseline model.

In [5]:
y_test_pred = np.argmax(best_model_basic.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(best_model_basic.loss(data['X_val']), axis=1)
print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())

Validation set accuracy:  0.541
Test set accuracy:  0.524


# Train improved model
Design a new model in `FullyConnectedNetImproved` in the file `cse493g1/classifiers/fc_net.py`. You can start by having `FullyConnectedNetImproved` be the same design as `FullyConnectedNetBasic`. Next, complete the BatchNormoralization.ipynb and Dropout.ipynb notebooks. Then return to this notebook and complete `FullyConnectedNetImproved` by adding in batchnorm and dropout. Try to beat the accuracy of your baseline model! You may have to adjust your hyperparameters.

In [37]:
best_model_improved = None

################################################################################
# TODO: Train the best FullyConnectedNetImproved that you can on CIFAR-10. You might   #
# find batch/layer normalization and dropout useful. Store your best model in  #
# the best_mode_improved variable.                                                     #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

lr = 0.00122810883630280872
reg = 0.0011124710979752646
weight_scale = 0.011387564370987585
dropout_keep_ratio = 0.9
H1, H2, H3 = 100, 50, 100
print(f"lr: {lr}, reg: {reg}, weight_scale: {weight_scale}")
net = FullyConnectedNetImproved(hidden_dims=[H1, H2, H3], dropout_keep_ratio=dropout_keep_ratio,
                                reg=reg ,weight_scale=weight_scale, dtype=np.float64,
                                normalization= 'layernorm')

solver = Solver(net, data,
                    update_rule='adam',
                    optim_config={
                      'learning_rate': lr,
                    },
                    lr_decay=0.95,
                    num_epochs=15, batch_size=200, print_every=100,
                    verbose = True)
solver.train()
y_val_pred = np.argmax(net.loss(data['X_val']), axis=1)
val_acc = (y_val_pred == data['y_val']).mean()
best_model_improved = net

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

lr: 0.0012281088363028088, reg: 0.0011124710979752647, weight_scale: 0.011387564370987585
(Iteration 1 / 3675) loss: 2.331599
(Epoch 0 / 15) train acc: 0.176000; val_acc: 0.172000
(Iteration 101 / 3675) loss: 1.779805
(Iteration 201 / 3675) loss: 1.694969
(Epoch 1 / 15) train acc: 0.440000; val_acc: 0.448000
(Iteration 301 / 3675) loss: 1.696769
(Iteration 401 / 3675) loss: 1.651655
(Epoch 2 / 15) train acc: 0.441000; val_acc: 0.472000
(Iteration 501 / 3675) loss: 1.500860
(Iteration 601 / 3675) loss: 1.732796
(Iteration 701 / 3675) loss: 1.504536
(Epoch 3 / 15) train acc: 0.493000; val_acc: 0.481000
(Iteration 801 / 3675) loss: 1.687051
(Iteration 901 / 3675) loss: 1.443392
(Epoch 4 / 15) train acc: 0.494000; val_acc: 0.496000
(Iteration 1001 / 3675) loss: 1.534725
(Iteration 1101 / 3675) loss: 1.623322
(Iteration 1201 / 3675) loss: 1.501162
(Epoch 5 / 15) train acc: 0.540000; val_acc: 0.500000
(Iteration 1301 / 3675) loss: 1.501353
(Iteration 1401 / 3675) loss: 1.519748
(Epoch 6 / 15

# Test Your Model!
Run your best model on the validation and test sets. Are you able to outperform the baseline model that has no Batchnorm or Dropout?

In [39]:
y_test_pred = np.argmax(best_model_improved.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(best_model_improved.loss(data['X_val']), axis=1)
print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())

Validation set accuracy:  0.539
Test set accuracy:  0.545
