In [16]:
import numpy as np
import tensorflow as tf
import torch
import yaml
import random
import os

import torch.nn as nn
import torch.nn.functional as F
import torchvision
from collections import defaultdict

In [17]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True    

In [18]:
is_cuda = torch.cuda.is_available()
device = torch.device(torch.cuda.current_device()) if is_cuda else torch.device("cpu")

In [19]:
conf_fn = "model_config.yml"

In [20]:
with open(conf_fn) as cf:
    conf = yaml.load(cf, Loader=yaml.FullLoader)

In [21]:
filter1 = conf["filter1"]
filter2 = conf["filter2"]
learning_rate = conf["learning_rate"]
batch_size = conf["batch_size"]
dropout = conf["dropout"]
epochs = conf["epochs"]
seed = conf["seed"]

stopping_patience = conf["early_stopping_patience"]
lr_patience = conf["lr_patience"]
verbose = 1

In [22]:
seed_everything(seed)

In [23]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [26]:
x_test.shape

(10000, 32, 32, 3)

In [8]:
num_classes = 10

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()#tf.keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Resize images for pytorch
x_train = x_train.transpose((0, 3, 1, 2))
x_test = x_test.transpose((0, 3, 1, 2))

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

x_train shape: (50000, 3, 32, 32)
50000 train samples
10000 test samples


In [9]:
trainset = torch.utils.data.TensorDataset(
    torch.from_numpy(x_train).float(), 
    torch.from_numpy(y_train).long()
)

train_loader = torch.utils.data.DataLoader(trainset, 
                                          batch_size=batch_size,
                                          shuffle=True, 
                                          num_workers=2)

testset = torch.utils.data.TensorDataset(
    torch.from_numpy(x_test).float(), 
    torch.from_numpy(y_test).long()
)

test_loader = torch.utils.data.DataLoader(testset, 
                                         batch_size=batch_size,
                                         shuffle=False, 
                                         num_workers=2)

In [10]:
class Net(nn.Module):
    def __init__(self, filter1, filter2, dropout, num_classes):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, filter1, 3)
        self.conv2 = nn.Conv2d(filter1, filter2, 3)
        self.fc1 = nn.Linear(4 * filter2 * 3 * 3, num_classes)
        self.pool = nn.MaxPool2d(2, 2)
        self.dr1 = nn.Dropout(dropout)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = self.dr1(x)
        #print(x.shape)
        x = self.fc1(x)
        x = F.log_softmax(x, dim=-1)
        return x

In [11]:
model = Net(filter1, filter2, dropout, num_classes).to(device)

In [12]:
optimizer = torch.optim.Adam(
    model.parameters(),
    lr = learning_rate,
)

### Load loss
criterion = torch.nn.NLLLoss()

### Load schedulers 
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    patience = lr_patience, 
    verbose = verbose,
    min_lr = 1.0e-13
)

In [13]:
results_dict = defaultdict(list)
for epoch in range(epochs):  # loop over the dataset multiple times

    """ Train """
    train_loss, train_accuracy = [], []
    model.train()
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels.squeeze(-1))
        loss.backward()
        optimizer.step()

        # print statistics
        train_loss.append(loss.item())
        train_accuracy.append(
            (torch.argmax(outputs, -1) == labels.squeeze(-1)).float().mean().item()
        )
        
    """ Validate """
    val_loss, val_accuracy = [], []
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device) 
            labels = labels.to(device)

            # forward 
            outputs = model(inputs)
            loss = criterion(outputs, labels.squeeze(-1))

            # print statistics
            val_loss.append(loss.item())
            val_accuracy.append(
                (torch.argmax(outputs, -1) == labels.squeeze(-1)).float().mean().item()
            )
    
    results_dict["train_loss"].append(np.mean(train_loss))
    results_dict["train_accuracy"].append(np.mean(train_accuracy))
    results_dict["valid_loss"].append(np.mean(val_loss))
    results_dict["valid_accuracy"].append(np.mean(val_accuracy))
    
    print(f'Epoch {epoch} train_acc {results_dict["train_accuracy"][-1]} valid_acc {results_dict["valid_accuracy"][-1]}')
    
    # Anneal learning rate
    lr_scheduler.step(1-results_dict["valid_accuracy"][-1])
    
    # Early stopping
    best_epoch = [
        i for i,j in enumerate(results_dict["valid_accuracy"]) if j == max(results_dict["valid_accuracy"])
    ][0]
    offset = epoch - best_epoch
    if offset >= stopping_patience:
        break

Epoch 0 train_acc 0.43008237364043506 valid_acc 0.5191693290734825
Epoch 1 train_acc 0.5302103326935381 valid_acc 0.5729832268370607
Epoch 2 train_acc 0.568937939859245 valid_acc 0.5976437699680511
Epoch 3 train_acc 0.5946497120921305 valid_acc 0.6108226837060703
Epoch 4 train_acc 0.6121241202815099 valid_acc 0.6317891373801917
Epoch 5 train_acc 0.6270193538067819 valid_acc 0.6282947284345048
Epoch 6 train_acc 0.6353366922584773 valid_acc 0.6523562300319489
Epoch 7 train_acc 0.6428142994241842 valid_acc 0.6471645367412141
Epoch 8 train_acc 0.6479526551503519 valid_acc 0.6572484025559105
Epoch 9 train_acc 0.6524112284069098 valid_acc 0.6644369009584664
Epoch 10 train_acc 0.6590491042866283 valid_acc 0.6652356230031949
Epoch 11 train_acc 0.6647672744721689 valid_acc 0.6591453674121406
Epoch 12 train_acc 0.6664267434420985 valid_acc 0.6683306709265175
Epoch 13 train_acc 0.6726847408829175 valid_acc 0.6613418530351438
Epoch 14 train_acc 0.6754838451695457 valid_acc 0.6765175718849841
Epoch