# MNS - Biological Plausible Deep Learning
## Simple Backprop Baselines (DNNs and CNNs)

#### 1. Data Setup - Download and Loading-In
#### 2. PyTorch DNNs with Bayesian Optimization
#### 3. PyTorch CNNs with Bayesian Optimization
#### 4. Run Example 784/3072 - 500 - 10 Architecture on all Datasets 

In [None]:
!pip install -r requirements.txt --quiet

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
# Import Packages
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms

# Import tf for tensorboard monitoring of training
import tensorflow as tf

# Import Network Architectures
from models.DNN import DNN, eval_dnn
from models.CNN import CNN, eval_cnn

# Import log-helper/learning plot functions
from utils.helpers import *
from utils.logger import *

# Import Bayesian Optimization Module
from utils.bayesian_opt import BO_NN
from sklearn.model_selection import train_test_split

In [5]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    print("Torch Device: {}".format(torch.cuda.get_device_name(0)))
else:
    print("Torch Device: Local CPU")

Torch Device: GeForce GTX 1080


In [6]:
# Create all necessary directory if non-existent
global data_dir
data_dir = os.getcwd() +"/data"

if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    print("Created New Data Directory")

# Create Log Directory or remove tensorboard log files in log dir
log_dir = os.getcwd() + "/logs"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
    print("Created New Log Directory")
else:
    filelist = [ f for f in os.listdir(log_dir) if f.startswith("events")]
    for f in filelist:
        os.remove(os.path.join(log_dir, f))
    print("Deleted Old TF/TensorBoard Log Files in Existing Log Directory")
    
models_dir = os.getcwd() + "/models"

Deleted Old TF/TensorBoard Log Files in Existing Log Directory


# Download, Import and Plot Datasets

In [7]:
download_data()

No download of MNIST needed.
No download of Fashion-MNIST needed.
No download of CIFAR-10 needed.


In [8]:
# MNIST dataset
X_mnist, y_mnist = get_data(num_samples=70000, dataset="mnist")
# MNIST dataset
X_fashion, y_fashion = get_data(num_samples=70000, dataset="fashion")
# MNIST dataset
X_cifar10, y_cifar10 = get_data(num_samples=60000, dataset="cifar10")

# Simple Feedforward Neural Net

### Run a Simple DNN

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# Define batchsize for data-loading/Epochs for training
batch_size = 100
num_epochs = 5
learning_rate = 0.001

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[784, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

In [12]:
model = train_model("dnn", dnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 20000,
                    model_fname ="temp_model_dnn_mnist",
                    verbose=True, logging=True)

# Get test error
score = get_test_error("dnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))

train| epoch  1| batch 20000/41996| acc: 0.9572| loss: 0.1477| time: 0.07
valid| epoch  1| batch 20000/41996| acc: 0.9477| loss: 0.1742| time: 0.00
-------------------------------------------------------------------------
train| epoch  1| batch 40000/41996| acc: 0.9629| loss: 0.1270| time: 0.07
valid| epoch  1| batch 40000/41996| acc: 0.9521| loss: 0.1598| time: 0.00
-------------------------------------------------------------------------
train| epoch  2| batch 20000/41996| acc: 0.9722| loss: 0.0955| time: 0.09
valid| epoch  2| batch 20000/41996| acc: 0.9594| loss: 0.1337| time: 0.00
-------------------------------------------------------------------------
train| epoch  2| batch 40000/41996| acc: 0.9769| loss: 0.0801| time: 0.07
valid| epoch  2| batch 40000/41996| acc: 0.9624| loss: 0.1220| time: 0.00
-------------------------------------------------------------------------
train| epoch  3| batch 20000/41996| acc: 0.9808| loss: 0.0655| time: 0.07
valid| epoch  3| batch 20000/41996| ac

### Compute Cross Validation Accuracy for all 3 Datasets

In [13]:
# Run 3-fold cross-validation on specific architecture for MNIST
eval_dnn("mnist", batch_size, learning_rate,
         num_layers=1, h_l_1=500,
         num_epochs=5, k_fold=3, verbose=True)

Dataset: mnist
Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 0: 784 Units
	 Layer 1: 500 Units
Cross-Validation Score Fold 1: 0.9724457147936301
Cross-Validation Score Fold 2: 0.9747138684152821
Cross-Validation Score Fold 3: 0.9758260361101898


0.974328539773034

In [14]:
# Run 3-fold cross-validation on specific architecture for Fashion-MNIST
eval_dnn("fashion", batch_size, learning_rate,
         num_layers=1, h_l_1=500,
         num_epochs=5, k_fold=3, verbose=True)

Dataset: fashion
Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 0: 784 Units
	 Layer 1: 500 Units
Cross-Validation Score Fold 1: 0.8788774635818337
Cross-Validation Score Fold 2: 0.8741963137591086
Cross-Validation Score Fold 3: 0.8605657951135877


0.8712131908181767

In [15]:
# Run 3-fold cross-validation on specific architecture for CIFAR-10
eval_dnn("cifar10", batch_size, learning_rate,
         num_layers=1, h_l_1=500,
         num_epochs=5, k_fold=3, verbose=True)

Dataset: cifar10
Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 0: 3072 Units
	 Layer 1: 500 Units
Cross-Validation Score Fold 1: 0.40925
Cross-Validation Score Fold 2: 0.43979999999999997
Cross-Validation Score Fold 3: 0.41640000000000005


0.4218166666666667

### Run Bayesian Optimization on DNN Hyperparameters

In [None]:
# Define Search Hyperspace for Bayesian Optimization on DNN architectures
hyper_space_dnn = {'batch_size': (10, 500),
                   'learning_rate': (0.0001, 0.05),
                   'num_layers': (1, 6),
                   'h_l_1': (30, 500),
                   'h_l_2': (30, 500),
                   'h_l_3': (30, 500),
                   'h_l_4': (30, 500),
                   'h_l_5': (30, 500),
                   'h_l_6': (30, 500)}

bo_iters = 50

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for MNIST
opt_log = BO_NN(bo_iters, eval_dnn, "dnn", "mnist", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for MNIST
opt_log = BO_NN(2, eval_dnn, "dnn", "mnist", hyper_space_dnn,
                num_epochs=2, k_fold=3, logging=True, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for Fashion-MNIST
opt_log = BO_NN(bo_iters, eval_dnn, "dnn", "fashion", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for CIFAR-10
opt_log = BO_NN(bo_iters, eval_dnn, "dnn", "cifar10", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

# Simple Convolutional Neural Network

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# ConvNet Parameters
batch_size = 100
ch_sizes = [1, 16, 32]
k_sizes = [5, 5]
stride = 1
padding = 2
out_size = 10
num_epochs = 2
learning_rate = 0.001

# Instantiate the model with layersizes, Loss fct, optimizer
cnn_model = CNN(ch_sizes, k_sizes,
                stride, padding, out_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)

In [19]:
model = train_model("cnn", cnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion, log_freq=10000,
                    model_fname ="temp_model_cnn",
                    verbose=False, logging=True)

# Get test error
score = get_test_error("cnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))

Test Accuracy: 0.9842857142857143


In [21]:
# Run 3-fold cross-validation on specific architecture
eval_cnn("mnist", batch_size, learning_rate, num_layers=2,
         ch_1=16, ch_2=32, k_1=5, k_2=5,
         stride=1, padding=2,
         k_fold=2, verbose=True)

Batchsize: 100
Learning Rate: 0.001
Architecture of Cross-Validated Network:
	 Layer 1: 16 Channels, 5 Kernel Size
	 Layer 2: 32 Channels, 5 Kernel Size
Cross-Validation Score Fold 1: 0.9785730036316155
Cross-Validation Score Fold 2: 0.9679976156452863


0.9732853096384508

In [22]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN
hyper_space_cnn = {'batch_size': (10, 500),
                   'learning_rate': (0.0001, 0.05),
                   'num_layers': (1, 5),
                   'ch_1': (3, 64),
                   'ch_2': (3, 64),
                   'ch_3': (3, 64),
                   'ch_4': (3, 64),
                   'ch_5': (3, 64),
                   'k_1': (2, 10),
                   'k_2': (2, 10),
                   'k_3': (2, 10),
                   'k_4': (2, 10),
                   'k_5': (2, 10),
                   'stride': (1, 3),
                   'padding': (1, 3)}

bo_iters = 50

In [32]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on CNN for MNIST
opt_log = BO_NN(bo_iters, eval_cnn, "cnn", "mnist", hyper_space_cnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

Loaded previously existing Log with 1 BO iterations.
Start Logging to ./logs/bo_logs_cnn_mnist.json
13.0
4.0
0.0
6.25
6.0
BO iter  2 | cv-acc: 0.9788 | best-acc: 0.9788 | time: 40.63
8.25
15.0
7.0
3.5


KeyboardInterrupt: 

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on CNN for Fashion-MNIST
opt_log = BO_NN(bo_iters, eval_cnn, "cnn", "fashion", hyper_space_cnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

In [None]:
# Run Bayesian Optimization (UCB-Acquisition Fct) on DNN for CIFAR-10
opt_log = BO_NN(bo_iters, eval_cnn, "cnn", "cifar10", hyper_space_dnn,
                num_epochs=10, k_fold=3, logging=True, verbose=True)

### Train Full Models (10 Epochs, Logging every 5000 eps and all Data)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# Define batchsize for data-loading/Epochs for training
batch_size = 100
num_epochs = 10
learning_rate = 0.001

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[784, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

model = train_model("dnn", dnn_model, 10,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 5000,
                    model_fname ="temp_model_dnn_mnist",
                    verbose=True, logging=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_fashion, y_fashion,
                                                    stratify=y_fashion,
                                                    random_state=0)

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[784, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

model = train_model("dnn", dnn_model, 10,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 5000,
                    model_fname ="temp_model_dnn_fashion",
                    verbose=False, logging=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_cifar10, y_cifar10,
                                                    stratify=y_cifar10,
                                                    random_state=0)

# Instantiate the model with layersize and Logging directory
dnn_model = DNN(h_sizes=[3072, 500], out_size=10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=learning_rate)

model = train_model("dnn", dnn_model, 10,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion,
                    log_freq = 5000,
                    model_fname ="temp_model_dnn_cifar",
                    verbose=False, logging=True)

In [None]:
# ConvNet Parameters
batch_size = 100
ch_sizes = [1, 16, 32]
k_sizes = [5, 5]
stride = 1
padding = 2
out_size = 10
num_epochs = 10
learning_rate = 0.001

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_mnist, y_mnist,
                                                    stratify=y_mnist,
                                                    random_state=0)

# Instantiate the model with layersizes, Loss fct, optimizer
cnn_model = CNN(ch_sizes, k_sizes,
                stride, padding, out_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)

model = train_model("cnn", cnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion, log_freq=5000,
                    model_fname = "mnist_cnn",
                    verbose=True, logging=True)

# Get test error
score = get_test_error("cnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_fashion, y_fashion,
                                                    stratify=y_fashion,
                                                    random_state=0)

# Instantiate the model with layersizes, Loss fct, optimizer
cnn_model = CNN(ch_sizes, k_sizes,
                stride, padding, out_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)

model = train_model("cnn", cnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion, log_freq=5000,
                    model_fname ="fashion_cnn",
                    verbose=True, logging=True)

# Get test error
score = get_test_error("cnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))

In [None]:
ch_sizes = [3, 16, 32]

X_train, X_test, y_train, y_test = train_test_split(X_cifar10, y_cifar10,
                                                    stratify=y_cifar10,
                                                    random_state=0)

# Instantiate the model with layersizes, Loss fct, optimizer
cnn_model = CNN(ch_sizes, k_sizes,
                stride, padding, out_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=learning_rate)

model = train_model("cnn", cnn_model, num_epochs,
                    X_train, y_train, batch_size,
                    device, optimizer, criterion, log_freq=5000,
                    model_fname ="cifar10_cnn",
                    verbose=True, logging=True)

# Get test error
score = get_test_error("cnn", device, model, X_test, y_test)
print("Test Accuracy: {}".format(score))