In [2]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils import data
def prepare_data():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)

    root_dir = "./data3/train";
    train_set = datasets.ImageFolder(root = root_dir,
                transform = transforms.ToTensor())

    trainloader = data.DataLoader(train_set, batch_size = 4, shuffle = True)

    root_dir = "./data3/test";

    train_set = datasets.ImageFolder(root = root_dir,
                transform = transforms.ToTensor())

    testloader = data.DataLoader(train_set, batch_size =4, shuffle = True)
    classes = ('Natalie Portman', 'Julia Roberts', 'Neil Patrick Harris'
                    , 'Keifer Sutherland', 'Ben Stiller', 'Anne Hathaway'
                    , 'David Boreanaz', 'Jamie Foxx', 'Sofia Vergara'
                    , 'Elizabeth Banks', 'Jensen Ackles', 'Amy Adams'
                    , 'Kristen Bell', 'Bradley Cooper', 'Emily Deschanel'
                    , 'Zooey Deschanel', 'Jon Hamm', 'Scarlett Johansson'
                    , 'Blake Lively', 'Eva Longoria', 'Amy Poehler'
                    , 'Kristen Stewart', 'Kerry Washington', 'Leighton Meester'
                    , 'Olivia Wilde', 'Zac Efron', 'Miley Cyrus','Jim Parsons')
    return trainloader, testloader, classes
trainloader, testloader, classes = prepare_data()

Bellow we have two implementations of neural network - one with two dimentional batch normalization, and the other one 
without it. Each neural network consists of 4 convolutional layers with filters of sizes(11,9,7,6), with stride 1 and 
no padding. After each convolutional layer operation MaxPool of size (2,2) is used. Then there are two fully connected 
layers, first with 3000 input and 600 out, second with 600 input and output of 28.

In [3]:
import torch.nn as nn
import torch.nn.functional as F

def tensor_2d_mean(image):
    return torch.mm(torch.mm(image,torch.ones(image.shape[0], 1)).T, torch.ones(image.shape[1], 1)) / (image.shape[0]*image.shape[1])

def mini_batch_normalization(mini_batch, epsilon):
    batch_size = mini_batch.shape[0]
    number_of_layers = mini_batch.shape[1]
    sum = torch.zeros(mini_batch.shape[2], mini_batch.shape[3])
    for i in range(number_of_layers):
        for j in range(batch_size):
            sum = sum + mini_batch[j][i]
        mean = tensor_2d_mean(sum) / batch_size
        sum = 0
        for j in range(batch_size):
            sum += (mini_batch[j][i] - mean)**2
        variance = tensor_2d_mean(sum) / batch_size
        for j in range(batch_size):
            mini_batch[j][i] = (mini_batch[j][i] - mean)  / torch.sqrt(variance + epsilon)
        sum = 0
    return mini_batch

def prepare_and_init_Net_with_batch_norm(eps):
    class Net(nn.Module):
        def __init__(self, device):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 6, 11)
            self.conv2 = nn.Conv2d(6, 12, 9)
            self.conv3 = nn.Conv2d(12, 24, 7)
            self.conv4 = nn.Conv2d(24, 30, 6)
            self.pool2 = nn.MaxPool2d(2, 2)
            self.fc1 = nn.Linear(30 * 10 * 10, 600)
            self.fc1_bn = nn.BatchNorm1d(600)
            self.fc3 = nn.Linear(600, 28)

        def forward(self, x):
            x = self.pool2(F.relu(mini_batch_normalization(self.conv1(x), eps)))
            x = self.pool2(F.relu(mini_batch_normalization(self.conv2(x), eps)))
            x = self.pool2(F.relu(mini_batch_normalization(self.conv3(x), eps)))
            x = self.pool2(F.relu(mini_batch_normalization(self.conv4(x), eps)))
            x = x.view(-1, 30 * 10 * 10)
            x = F.relu(self.fc1_bn(self.fc1(x)))
            x = self.fc3(x)
            return x


    net = Net(device='cuda:0')
    return net

In [4]:
def prepare_and_init_Net_without_batch_norm():
    class Net(nn.Module):
        def __init__(self, device):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 6, 11)
            self.conv2 = nn.Conv2d(6, 12, 9)
            self.conv3 = nn.Conv2d(12, 24, 7)
            self.conv4 = nn.Conv2d(24, 30, 6)
            self.pool2 = nn.MaxPool2d(2, 2)
            self.fc1 = nn.Linear(30 * 10 * 10, 600)
            self.fc3 = nn.Linear(600, 28)

        def forward(self, x):
            x = self.pool2(F.relu(self.conv1(x)))
            x = self.pool2(F.relu(self.conv2(x)))
            x = self.pool2(F.relu(self.conv3(x)))
            x = self.pool2(F.relu(self.conv4(x)))
            x = x.view(-1, 30 * 10 * 10)
            x = F.relu(self.fc1(x))
            x = self.fc3(x)
            return x


    net = Net(device='cuda:0')
    return net

In [5]:
import torch.optim as optim
def crit_and_opt(learning_rate, net):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    return criterion, optimizer

In [6]:
def cuda_init():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    return device


In [7]:
def learn(device, trainloader, optimizer, criterion, net):
    running_loss_for_optimizer = 0.0
    running_loss = 0.0
    loss_for_optimizer = 1.0
    learning_rate = 0.001
    for epoch in range(15):  # loop over the dataset multiple times
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            running_loss_for_optimizer += loss.item()
            if i % 1000 == 999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                running_loss = 0.0
        print("running loss at end of epoch: ", running_loss_for_optimizer/len(trainloader))
        # if the loss is low enough, we decrease the learning rate
        if(running_loss_for_optimizer/len(trainloader) < loss_for_optimizer):
            learning_rate /= 2
            loss_for_optimizer /= 2
            criterion, optimizer = crit_and_opt(learning_rate)
            print( "devaluating optimizer, current value: ", loss_for_optimizer)
        running_loss_for_optimizer = 0.0
        running_loss = 0.0

    print('Finished Training')

In [8]:
def conf_matrix(net, testloader):
    device = cuda_init()
    conf_mat = torch.zeros(28, 28)
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs, 1)
            for i in range(images.shape[0]):
                label = labels[i]
                predict = predicted[i]
                conf_mat[label.item()][predict.item()] +=1
    return conf_mat

In [9]:
net = prepare_and_init_Net_with_batch_norm(10**(-15))
PATH = './actors_net_batch_norm.pth'
net.load_state_dict(torch.load(PATH))
trainloader, testloader, classes = prepare_data()
tmp = conf_matrix(net, testloader)

In [28]:
sum = 0
for i in range(28):
    sum += tmp[i][i]/99
print(sum/28)
# print(tmp)





tensor(0.7439)
