In [None]:
%run 'cnn.py'

50.0 percent of epoch 1 complete

In [None]:
!ls

In [None]:
from baseline_cnn import *
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as func
import torch.nn.init as torch_init
import torch.optim as optim

# Data utils and dataloader
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt

# Custom utils file
from utils import evaluate

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

## Load Train, Validation and Test data

In [None]:
transform = transforms.Compose([transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()])
dataset = loader('mini_train.csv','./datasets/cs154-fa19-public/',transform=transform)
testdataset = loader("mini_test.csv", './datasets/cs154-fa19-public/', transform=transform)

batch_size = 64
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
# indices = list(range((int)(dataset_size/4))) # smmall part for experiments 
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(testdataset, batch_size=len(testdataset),
                                        shuffle=False)

## Show a sample from the dataset

In [None]:
def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next() # We get batch_size number of images by calling .next()

# Places the images in a grid
imshow(torchvision.utils.make_grid(images))
print(labels)

## Create the network

In [None]:
NUM_CLASSES = 11
net=Nnet(num_classes=NUM_CLASSES).to(computing_device)
net.apply(weights_init)
print(net)

## Define the Lossfunction and the Optimizer

In [None]:
#criterion = nn.CrossEntropyLoss(weight=dataset.get_class_weights()) # Weighted Loss
criterion = nn.CrossEntropyLoss()

# Pass the weights of the network (net.parameters) to the optimizer
optimizer = optim.Adam(net.parameters(), lr=0.0001, weight_decay=0.0005) 

## Train the network

In [None]:
# Track the loss across training
train_epoch_losses = []
validation_epoch_losses = []
train_batch_losses = []
validation_batch_losses = []
avg_minibatch_loss = []
N = 50 # Intervall of average minibatch loss reporting.
n_epochs = 2
PATH = "./cnn_celeb.pth"  # Where to save the model

for epoch in range(n_epochs):
    N_minibatch_loss = 0.0

    # Get the next minibatch of images, labels for training
    for minibatch_count, (images, labels) in enumerate(train_loader, 0):
        fraction_done = round(minibatch_count/len(dataset)*100, 3)
        print("{} percent of epoch {} complete".format(fraction_done, epoch + 1), end="\r")
        # Zero out the stored gradient (buffer) from the previous iteration
        optimizer.zero_grad()
        # Put the minibatch data in CUDA Tensors and run on the GPU if supported
        images, labels = images.to(computing_device), labels.to(computing_device)
        # Perform the forward pass through the network and compute the loss
        outputs = net(images)
        
        loss = criterion(outputs, labels) # If we are using Cross Entropy, this is doing Softmax
        # Automagically compute the gradients and backpropagate the loss through the network
        loss.backward()

        # Update the weights
        optimizer.step()
        # Add this iteration's loss to the total_loss
        train_batch_losses.append(loss.item())
        N_minibatch_loss += loss

        if minibatch_count % N == 49:
            # Print the loss averaged over the last N mini-batches
            N_minibatch_loss /= N
            print('Epoch %d, average minibatch %d loss: %.3f' % (epoch + 1, minibatch_count + 1, N_minibatch_loss))
            # Add the averaged loss over N minibatches and reset the counter
            avg_minibatch_loss.append(N_minibatch_loss)
            N_minibatch_loss = 0.0

    print("Finished", epoch + 1, "epochs of training")
    print("Saving model...")
    torch.save(net.state_dict(), PATH)
    print("Done.")

    # save train loss
    train_epoch_loss = np.average(np.array(train_batch_losses))
    train_batch_losses = []
    train_epoch_losses.append(train_epoch_loss)

    with torch.no_grad():
        # validation implementation
        for minibatch_count, (images, labels) in enumerate(validation_loader, 0):
            # Put the validation minibatch data in CUDA Tensors and run on the GPU if supported
            images, labels = images.to(computing_device), labels.to(computing_device)
            # Perform the forward pass through the network and compute the loss
            outputs = net(images)

            validation_batch_loss = criterion(outputs, labels)
            validation_batch_losses.append(validation_batch_loss)

        epoch_validation_loss = np.average(np.array(validation_batch_losses))
        validation_batch_losses = []
        validation_epoch_losses.append(epoch_validation_loss)

    print('Epoch %d, validation loss: %.3f' % (epoch + 1, epoch_validation_loss))

## Test the model

In [None]:
with torch.no_grad():
    for images, labels in test_loader: # Remember they come in batches
        # Since we are not doing this through criterion, we must add softmax our self
        outputs = func.softmax(net(images), dim=1) 
        _, predicted = torch.max(outputs.data, 1)
        
        predicted = func.one_hot(predicted, num_classes=NUM_CLASSES).type(torch.FloatTensor)
        labels = func.one_hot(labels, num_classes=NUM_CLASSES).type(torch.FloatTensor)
        evaluate(predicted, labels)


## Plot loss

In [None]:
print(train_epoch_losses)
print(validation_epoch_losses)

plt.plot(train_epoch_losses, label="train loss")
plt.plot(validation_epoch_losses, label="validation loss")
plt.xlabel("Epoch")
plt.ylabel("Cross Entropy Loss")
plt.title("Loss as a function of number of epochs")
plt.legend()
plt.show()


## Plot Weights

In [None]:
COLUMNS_IN_FIGURE = 10

def plot_weights(model, layer):
    weights = model.main._modules.get(str(layer)).weight.data.numpy()

    # normalize weights
    mean = np.mean(weights, axis=(1, 2), keepdims=True)
    std = np.std(weights, axis=(1, 2), keepdims=True)
    normalized_weighs = (weights - mean) / std

    if not weights.shape[-1] == 3:
        raise Exception("last dim needs to be 3")
    num_weights = normalized_weighs.shape[0]
    num_rows = 1 + num_weights // COLUMNS_IN_FIGURE
    fig = plt.figure(figsize=(COLUMNS_IN_FIGURE, num_rows))
    for i in range(normalized_weighs.shape[0]):
        sub = fig.add_subplot(num_rows, COLUMNS_IN_FIGURE, i + 1)
        sub.axis('off')
        sub.imshow(normalized_weighs[i])
        sub.set_xticklabels([])
        sub.set_yticklabels([])

    plt.show()

    
    

plot_weights(net, 0)
plot_weights(net, 2)
plot_weights(net, 5)