### Initial Classification Neural Network

Before using any other famous Deep Neural Networks such as Alexnet or Resnet. We experimented to build our own shallow neural network to see how minimum Convolutional Neural Network would perform classifying Ebru-Suminagashi Images. 

## Library Selections
In order to use state of the art toolset for this research paper we installed CUDA 10 , Pytorch 1.0, Python 3.7 and openCV 3.4 on ubuntu 16.04 with NVDIA 1080 GPU.  https://arxiv.org/pdf/1606.02228.pdf


In [1]:
# Import required libraries for this section
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import cv2

import torch
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

# Use GPU if it's available
from collections import OrderedDict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of data set to use as test
validation_size = 0.5
test_validation_size = 0.4

transform = transforms.Compose([ transforms.CenterCrop(1000), transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(), transforms.ToTensor(),
                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

data_set = dset.ImageFolder(root="data",transform=transform)
dataloader = torch.utils.data.DataLoader(data_set, batch_size=4,shuffle=True,num_workers=2)

# obtain training indices that will be used for test
num_data = len(data_set)
indices = list(range(num_data))
np.random.shuffle(indices)
split = int(np.floor(test_validation_size * num_data))
train_idx, test_idx = indices[split:], indices[:split]
num_train_data = len(test_idx)
split_validation = int(np.floor(validation_size * num_train_data))
test_idx, validation_idx = test_idx[split_validation:], test_idx[:split_validation]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
validation_sampler  = SubsetRandomSampler(validation_idx)
test_sampler  = SubsetRandomSampler(test_idx)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(data_set, batch_size=batch_size,
                                           sampler = train_sampler, num_workers=num_workers)
validation_loader = torch.utils.data.DataLoader(data_set, batch_size=batch_size, 
                                           sampler = test_sampler, num_workers=num_workers)
test_loader  = torch.utils.data.DataLoader(data_set, batch_size=batch_size, 
                                           sampler = test_sampler, num_workers=num_workers)

classes = ('blurry','clear')

Andrew Ng says previos era (the era before big data) traditional train/test/validation split was 60/20/20. If we still have small dataset traditional ratios still ok to use like our ebru dataset which we have 800 examples

In [3]:
print('Total number of data - ', num_data)
print('Training Data approximately %60 of the total - ', len(train_idx))
print('Validation Data approximately %20 of the total - ', len(validation_idx))
print('Test Data approximately %20 of the total - ', len(test_idx))

Total number of data -  806
Training Data approximately %60 of the total -  484
Validation Data approximately %20 of the total -  161
Test Data approximately %20 of the total -  161


In [11]:
from torch.autograd import Variable
import torch.nn.functional as F

class SingleCNN(torch.nn.Module):
  
    #Our batch shape for input x is (3, 224, 224)
    
    def __init__(self):
        super(SingleCNN, self).__init__()
        
        #Input channels = 3, output channels = 18
        self.conv1 = torch.nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        #4608 input features, 64 output features (see sizing flow below)
        self.fc1 = torch.nn.Linear(18 * 112 * 112, 64)
        
        #64 input features, 2 output features for our 2 defined classes
        self.fc2 = torch.nn.Linear(64, 2)
        
    def forward(self, x):
        
        #Computes the activation of the first convolution
        #Size changes from (3, 224, 224) to (18, 112, 112)
        x = F.relu(self.conv1(x))
        
        #Size changes from (18, 224, 224) to (18, 112, 112)
        x = self.pool(x)
        
        #Reshape data to input to the input layer of the neural net
        #Size changes from (18, 112, 112) to (1, 225792)
        #Recall that the -1 infers this dimension from the other given dimension
        x = x.view(-1, 18 * 112 *112)
        
        #Computes the activation of the first fully connected layer
        #Size changes from (1, 225792) to (1, 64)
        x = F.relu(self.fc1(x))
        
        #Computes the second fully connected layer (activation applied later)
        #Size changes from (1, 64) to (1, 10)
        x = self.fc2(x)
        return(x)

In [12]:
def outputSize(in_size, kernel_size, stride, padding):

    output = int((in_size - kernel_size + 2*(padding)) / stride) + 1

    return(output)

In [13]:
import torch.optim as optim

def createLossAndOptimizer(net, learning_rate=0.001):
    
    #Loss function
    loss = torch.nn.CrossEntropyLoss()
    
    #Optimizer
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    return(loss, optimizer)

In [16]:
import time

def trainNet(net, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    #Get training data
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = createLossAndOptimizer(net, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.data
            total_train_loss += loss_size.data
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for inputs, labels in validation_loader:
            
            #Wrap tensors in Variables
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Forward pass
            val_outputs = net(inputs)
            val_loss_size = loss(val_outputs, labels)
            total_val_loss += val_loss_size.item()
        
        total_test_loss = 0
        accuracy = 0
        net.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
            
                #Wrap tensors in Variables
                inputs, labels = Variable(inputs), Variable(labels)
            
                #Forward pass
                test_outputs = net(inputs)
                test_loss_size = loss(test_outputs, labels)
                total_test_loss += test_loss_size.item()
                
                ps = torch.exp(test_outputs)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
                
        net.train()
        
        print("Training loss = {:.2f}".format(total_train_loss / len(train_loader)))
        print("Validation loss = {:.2f}".format(total_val_loss / len(validation_loader)))
        print("Test loss = {:.2f}".format(total_test_loss / len(test_loader)))
        print("Test Accuracy = {:.2f}".format(accuracy / len(test_loader)))
        with open('single_cnn.csv','a') as f:
            f.write(f"Train loss: {total_train_loss:.3f}.. "
                    f"Train loss: {total_val_loss:.3f}.. "
                    f"Test loss: {total_test_loss/len(test_loader):.3f}.. "
                    f"Test accuracy: {accuracy/len(test_loader):.3f}")
            f.write("\n")
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

In [17]:
CNN = SingleCNN()
trainNet(CNN, batch_size=32, n_epochs=5, learning_rate=0.001)

===== HYPERPARAMETERS =====
batch_size= 32
epochs= 5
learning_rate= 0.001
Epoch 1, 12% 	 train_loss: 21.38 took: 15.55s
Epoch 1, 24% 	 train_loss: 6.93 took: 15.65s
Epoch 1, 36% 	 train_loss: 2.05 took: 15.64s
Epoch 1, 48% 	 train_loss: 2.33 took: 15.48s
Epoch 1, 60% 	 train_loss: 0.91 took: 15.22s
Epoch 1, 72% 	 train_loss: 1.25 took: 15.55s
Epoch 1, 84% 	 train_loss: 0.85 took: 15.35s
Epoch 1, 96% 	 train_loss: 1.27 took: 15.54s
Training loss = 2.98
Validation loss = 0.84
Test loss = 0.79
Test Accuracy = 0.67
Epoch 2, 12% 	 train_loss: 0.62 took: 15.34s
Epoch 2, 24% 	 train_loss: 0.91 took: 15.40s
Epoch 2, 36% 	 train_loss: 1.00 took: 15.52s
Epoch 2, 48% 	 train_loss: 0.92 took: 15.40s
Epoch 2, 60% 	 train_loss: 0.74 took: 15.38s
Epoch 2, 72% 	 train_loss: 0.89 took: 15.69s
Epoch 2, 84% 	 train_loss: 0.86 took: 15.41s
Epoch 2, 96% 	 train_loss: 0.75 took: 14.95s
Training loss = 0.55
Validation loss = 0.59
Test loss = 0.48
Test Accuracy = 0.77
Epoch 3, 12% 	 train_loss: 0.84 took: 14.