# MNS - Biological Plausible Deep Learning
## Simple Backprop Baselines (DNNs and CNNs)

#### 1. Data Setup - Download and Loading-In
#### 2. PyTorch DNNs with Bayesian Optimization
#### 3. PyTorch CNNs with Bayesian Optimization
#### 4. Run Example 784/3072 - 500 - 10 Architecture on all Datasets 

In [None]:
!pip install -r requirements.txt --quiet

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# Import Packages
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms

# Import tf for tensorboard monitoring of training
import tensorflow as tf

# Import Network Architectures
from models.DNN import DNN, eval_dnn
from models.CNN import CNN, eval_cnn

# Import log-helper/learning plot functions
from utils.helpers import *
from utils.logger import *

# Import Bayesian Optimization Module
from utils.bayesian_opt import BO_NN

In [3]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    print("Torch Device: {}".format(torch.cuda.get_device_name(0)))
else:
    print("Torch Device: Local CPU")

Torch Device: Local CPU


In [4]:
# Create all necessary directory if non-existent
global data_dir
data_dir = os.getcwd() +"/data"

if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    print("Created New Data Directory")

# Create Log Directory or remove tensorboard log files in log dir
log_dir = os.getcwd() + "/logs"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
    print("Created New Log Directory")
else:
    filelist = [ f for f in os.listdir(log_dir) if f.startswith("events")]
    for f in filelist:
        os.remove(os.path.join(log_dir, f))
    print("Deleted Old TF/TensorBoard Log Files in Existing Log Directory")
    
models_dir = os.getcwd() + "/models"

Deleted Old TF/TensorBoard Log Files in Existing Log Directory


# Download, Import and Plot Datasets

In [5]:
download_data()

No download of MNIST needed.
No download of Fashion-MNIST needed.
No download of CIFAR-10 needed.


In [6]:
# MNIST dataset
X_mnist, y_mnist = get_data(num_samples=70000, dataset="mnist")
# MNIST dataset
X_fashion, y_fashion = get_data(num_samples=70000, dataset="fashion")
# MNIST dataset
X_cifar10, y_cifar10 = get_data(num_samples=60000, dataset="cifar10")

# Simple Feedforward Neural Net

### Run a Simple DNN

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# Define batchsize for data-loading/Epochs for training
batch_size = 100
num_epochs = 5
learning_rate = 0.001

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[784, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

In [10]:
model = train_model("dnn", dnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 20000,
                    model_fname ="models/temp_model_dnn_mnist.ckpt",
                    verbose=True, logging=True)

# Get test error
score = get_test_error("dnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))

train| epoch  1| batch 20000/41996| acc: 0.9894| loss: 0.0358| time: 0.29
valid| epoch  1| batch 20000/41996| acc: 0.9724| loss: 0.0917| time: 0.08
-------------------------------------------------------------------------
train| epoch  1| batch 40000/41996| acc: 0.9916| loss: 0.0301| time: 0.29
valid| epoch  1| batch 40000/41996| acc: 0.9727| loss: 0.0912| time: 0.10
-------------------------------------------------------------------------
train| epoch  2| batch 20000/41996| acc: 0.9921| loss: 0.0277| time: 0.29
valid| epoch  2| batch 20000/41996| acc: 0.9735| loss: 0.0895| time: 0.09
-------------------------------------------------------------------------
train| epoch  2| batch 40000/41996| acc: 0.9940| loss: 0.0235| time: 0.29
valid| epoch  2| batch 40000/41996| acc: 0.9729| loss: 0.0911| time: 0.08
-------------------------------------------------------------------------
train| epoch  3| batch 20000/41996| acc: 0.9943| loss: 0.0203| time: 0.31
valid| epoch  3| batch 20000/41996| ac

### Compute Cross Validation Accuracy for all 3 Datasets

In [11]:
# Run 3-fold cross-validation on specific architecture for MNIST
eval_dnn("mnist", batch_size, learning_rate,
         num_layers=1, h_l_1=500,
         num_epochs=5, k_fold=3, verbose=True)

Dataset: mnist
Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 0: 784 Units
	 Layer 1: 500 Units
Cross-Validation Score Fold 1: 0.9694892012341446
Cross-Validation Score Fold 2: 0.9687995542793468
Cross-Validation Score Fold 3: 0.975011786892975


0.9711001808021554

In [10]:
# Run 3-fold cross-validation on specific architecture for Fashion-MNIST
eval_dnn("fashion", batch_size, learning_rate,
         num_layers=1, h_l_1=500,
         num_epochs=5, k_fold=3, verbose=True)

Dataset: fashion
Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 0: 784 Units
	 Layer 1: 500 Units
Cross-Validation Score Fold 1: 0.876092544987
Cross-Validation Score Fold 2: 0.877111015859
Cross-Validation Score Fold 3: 0.758551221603


0.8372515941498805

In [11]:
# Run 3-fold cross-validation on specific architecture for CIFAR-10
eval_dnn("cifar10", batch_size, learning_rate,
         num_layers=1, h_l_1=500,
         num_epochs=5, k_fold=3, verbose=True)

Dataset: cifar10
Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 0: 3072 Units
	 Layer 1: 500 Units
Cross-Validation Score Fold 1: 0.4278
Cross-Validation Score Fold 2: 0.434
Cross-Validation Score Fold 3: 0.40585


0.42255

### Run Bayesian Optimization on DNN Hyperparameters

In [9]:
# Define Search Hyperspace for Bayesian Optimization on DNN architectures
hyper_space_dnn = {'batch_size': (10, 500),
                   'learning_rate': (0.0001, 0.05),
                   'num_layers': (1, 6),
                   'h_l_1': (30, 500),
                   'h_l_2': (30, 500),
                   'h_l_3': (30, 500),
                   'h_l_4': (30, 500),
                   'h_l_5': (30, 500),
                   'h_l_6': (30, 500)}

bo_iters = 50

In [13]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for MNIST
opt_log = BO_NN(bo_iters, eval_dnn, "dnn", "mnist", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

Start Logging to ./logs/bo_logs_dnn_mnist.json
BO iter  1 | cv-acc: 0.9590 | best-acc: 0.9590 | time: 29.70
BO iter  2 | cv-acc: 0.9629 | best-acc: 0.9629 | time: 38.18
BO iter  3 | cv-acc: 0.9004 | best-acc: 0.9629 | time: 31.98
BO iter  4 | cv-acc: 0.1361 | best-acc: 0.9629 | time: 122.09
BO iter  5 | cv-acc: 0.9315 | best-acc: 0.9629 | time: 39.83
BO iter  6 | cv-acc: 0.5563 | best-acc: 0.9629 | time: 238.18
BO iter  7 | cv-acc: 0.8754 | best-acc: 0.9629 | time: 34.68
BO iter  8 | cv-acc: 0.9472 | best-acc: 0.9629 | time: 37.67
BO iter  9 | cv-acc: 0.8748 | best-acc: 0.9629 | time: 36.25
BO iter 10 | cv-acc: 0.9255 | best-acc: 0.9629 | time: 40.50
BO iter 11 | cv-acc: 0.9371 | best-acc: 0.9629 | time: 33.72
BO iter 12 | cv-acc: 0.6745 | best-acc: 0.9629 | time: 37.56
BO iter 13 | cv-acc: 0.9253 | best-acc: 0.9629 | time: 33.46
BO iter 14 | cv-acc: 0.9496 | best-acc: 0.9629 | time: 44.18
BO iter 15 | cv-acc: 0.9284 | best-acc: 0.9629 | time: 41.98
BO iter 16 | cv-acc: 0.9738 | best-a

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for MNIST
opt_log = BO_NN(2, eval_dnn, "dnn", "mnist", hyper_space_dnn,
                num_epochs=2, k_fold=3, logging=True, verbose=True)

Loaded previously existing Log with 51 BO iterations.
Start Logging to ./logs/bo_logs_dnn_mnist.json


In [21]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for Fashion-MNIST
opt_log = BO_NN(bo_iters, eval_dnn, "dnn", "fashion", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

Start Logging to ./logs/bo_logs_dnn_fashion.json
BO iter  1 | cv-acc: 0.8530 | best-acc: 0.8530 | time: 30.22
BO iter  2 | cv-acc: 0.8560 | best-acc: 0.8560 | time: 38.44
BO iter  3 | cv-acc: 0.6448 | best-acc: 0.8560 | time: 32.25
BO iter  4 | cv-acc: 0.8574 | best-acc: 0.8574 | time: 545.27
BO iter  5 | cv-acc: 0.2121 | best-acc: 0.8574 | time: 232.47
BO iter  6 | cv-acc: 0.8607 | best-acc: 0.8607 | time: 554.57
BO iter  7 | cv-acc: 0.7732 | best-acc: 0.8607 | time: 48.55
BO iter  8 | cv-acc: 0.8727 | best-acc: 0.8727 | time: 555.34
BO iter  9 | cv-acc: 0.7778 | best-acc: 0.8727 | time: 45.31
BO iter 10 | cv-acc: 0.8319 | best-acc: 0.8727 | time: 35.15
BO iter 11 | cv-acc: 0.8430 | best-acc: 0.8727 | time: 34.44
BO iter 12 | cv-acc: 0.1983 | best-acc: 0.8727 | time: 228.98
BO iter 13 | cv-acc: 0.8796 | best-acc: 0.8796 | time: 552.15
BO iter 14 | cv-acc: 0.8537 | best-acc: 0.8796 | time: 547.69
BO iter 15 | cv-acc: 0.8263 | best-acc: 0.8796 | time: 40.06
BO iter 16 | cv-acc: 0.7475 |

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for CIFAR-10
opt_log = BO_NN(bo_iters, eval_dnn, "dnn", "cifar10", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

Start Logging to ./logs/bo_logs_dnn_cifar10.json
BO iter  1 | cv-acc: 0.1000 | best-acc: 0.1000 | time: 42.11
BO iter  2 | cv-acc: 0.2539 | best-acc: 0.2539 | time: 44.48
BO iter  3 | cv-acc: 0.1000 | best-acc: 0.2539 | time: 32.56
BO iter  4 | cv-acc: 0.4222 | best-acc: 0.4222 | time: 491.41
BO iter  5 | cv-acc: 0.4090 | best-acc: 0.4222 | time: 483.14
BO iter  6 | cv-acc: 0.4132 | best-acc: 0.4222 | time: 491.96
BO iter  7 | cv-acc: 0.1000 | best-acc: 0.4222 | time: 216.67
BO iter  8 | cv-acc: 0.4533 | best-acc: 0.4533 | time: 512.72
BO iter  9 | cv-acc: 0.3673 | best-acc: 0.4533 | time: 56.29
BO iter 10 | cv-acc: 0.3913 | best-acc: 0.4533 | time: 486.15
BO iter 11 | cv-acc: 0.4206 | best-acc: 0.4533 | time: 488.19
BO iter 12 | cv-acc: 0.1000 | best-acc: 0.4533 | time: 288.32
BO iter 13 | cv-acc: 0.3621 | best-acc: 0.4533 | time: 62.50
BO iter 14 | cv-acc: 0.4229 | best-acc: 0.4533 | time: 491.41
BO iter 15 | cv-acc: 0.3767 | best-acc: 0.4533 | time: 56.58
BO iter 16 | cv-acc: 0.3145

# Simple Convolutional Neural Network

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# ConvNet Parameters
batch_size = 100
ch_sizes = [1, 16, 32]
k_sizes = [5, 5]
stride = 1
padding = 2
out_size = 10
num_epochs = 2
learning_rate = 0.001

# Instantiate the model with layersizes, Loss fct, optimizer
cnn_model = CNN(ch_sizes, k_sizes,
                stride, padding, out_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)

In [None]:
model = train_model("cnn", cnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion, log_freq=10000,
                    model_fname ="models/temp_model_cnn.ckpt",
                    verbose=False, logging=True)

# Get test error
score = get_test_error("cnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))

In [None]:
log_fname = get_latest_log_fname(log_dir)
its, train_loss, val_loss, train_acc, val_acc = process_logger(log_fname)
plot_learning(its, train_acc, val_acc, train_loss, val_loss, "CNN - Learning Performance")

In [None]:
# Run 3-fold cross-validation on specific architecture
eval_cnn(batch_size, learning_rate, num_layers=2,
         ch_1=16, ch_2=32, k_1=5, k_2=5,
         stride=1, padding=2,
         k_fold=2, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN
hyper_space_cnn = {'batch_size': (10, 500),
                   'learning_rate': (0.0001, 0.05),
                   'num_layers': (1, 5),
                   'ch_1': (3, 64),
                   'ch_2': (3, 64),
                   'ch_3': (3, 64),
                   'ch_4': (3, 64),
                   'ch_5': (3, 64),
                   'k_1': (2, 10),
                   'k_2': (2, 10),
                   'k_3': (2, 10),
                   'k_4': (2, 10),
                   'k_5': (2, 10),
                   'stride': (1, 3),
                   'padding': (1, 3)}

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on CNN for MNIST
opt_log = BO_NN(bo_iters, eval_cnn, "cnn", "mnist", hyper_space_cnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on CNN for Fashion-MNIST
opt_log = BO_NN(bo_iters, eval_cnn, "cnn", "fashion", hyper_space_cnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for CIFAR-10
opt_log = BO_NN(bo_iters, eval_cnn, "cnn", "cifar10", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

### Train Full Models (10 Epochs, Logging every 5000 eps and all Data)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# Define batchsize for data-loading/Epochs for training
batch_size = 100
num_epochs = 10
learning_rate = 0.001

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[784, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

model = train_model("dnn", dnn_model, 10,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 5000,
                    model_fname ="models/temp_model_dnn_mnist.ckpt",
                    verbose=False, logging=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_fashion, y_fashion,
                                                    stratify=y_fashion,
                                                    random_state=0)

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[784, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

model = train_model("dnn", dnn_model, 10,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 5000,
                    model_fname ="models/temp_model_dnn_fashion.ckpt",
                    verbose=False, logging=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_cifar10, y_cifar10,
                                                    stratify=y_cifar10,
                                                    random_state=0)

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[3072, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

model = train_model("dnn", dnn_model, 10,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 5000,
                    model_fname ="models/temp_model_dnn_cifar.ckpt",
                    verbose=False, logging=True)