# Study Objectives
This Notebook is a classification study on eyantra fruits dataset.

# Importing Python libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Hyper Parameters

In [None]:
train_split = 0.7
EPOCHS = 13
train_batch = 8
test_batch = 4
learning_rate = 0.001
image_size = 100
classes = ['Apple', 'Banana', 'Orange', 'Pineapple', 'Strawberry','Other']
rate_gray = 0.2         # The rate of random images to be grayed by transformer
rate_flip = 0.4         # The rate of random images to be horizontally fliped by transformer


# Images Data Entry and Preparations

In [None]:
class IMG_Dataset(Dataset):
    def __init__(self,file_name):
        # Import data
        filedata = pd.read_csv(file_name)
        
        # Train / Test split
        trainset,testset = model_selection.train_test_split(
                                                        filedata,
                                                        train_size = train_split,
                                                        random_state=11)
        # Separate features from labels
        x_train = trainset.iloc[:,:-1].values
        x_test = testset.iloc[:,:-1].values
        train_labels = trainset.iloc[:,-1].values
        test_labels = testset.iloc[:,-1].values
        
        # Preprocess data before entering into net
        scaler = preprocessing.StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.fit_transform(x_test)
        
        # Utilizing TorchVision transforms on random images to improve learning
        transformer = torchvision.transforms.Compose([
                                          transforms.RandomGrayscale(rate_gray),
                                          transforms.RandomHorizontalFlip(rate_flip)])
        x_train = torch.from_numpy(x_train).long()
        x_train = transformer(x_train)
        
        # Prepare 4 datasets for Net processes
        self.X_train = torch.tensor(x_train,
                        dtype=torch.float32).reshape(-1,1,image_size,image_size)
        self.X_test = torch.tensor(x_test,
                        dtype=torch.float32).reshape(-1,1,image_size,image_size)
        self.train_labels = torch.tensor(train_labels)
        self.test_labels = torch.tensor(test_labels)
        
     
    def __len__(self):
        return len(self.train_labels)
        
    def __getitem__(self,index):
        return self.X_train[index],self.train_labels[index]

# Deep Learning net model based on LeNet5 model

In [None]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel (G) of 100*100, 1 output channels
        self.conv1 = nn.Conv2d(1, 1, 9)  #Conv. of 1*9*9 filter, 1 stride, 0 pads
        self.conv2 = nn.Conv2d(1, 1, 15) #Conv. of 1*15*15 filter, 1 stride, 0 pads
        self.fc1 = nn.Linear(16*16, 120)  #FC layer 16*16 to 120 nuerons
        self.fc2 = nn.Linear(120, 84) #FC layer 120 to 84 nuerons
        self.fc3 = nn.Linear(84, 6) # Output logits layer classes + 1 for noise
        
    def forward(self, x):
        # input --> conv1-->tanh-->avg.Pool-->sigmoid-->conv2-->tanh-->avg.Pool
        # -->sigmoid-->FC1-->tanh-->FC2-->tanh-->logits
        # Average pooling over a (2, 2) window
        x = F.sigmoid(F.max_pool2d(F.tanh(self.conv1(x)), (2, 2)))
        x = F.sigmoid(F.max_pool2d(F.tanh(self.conv2(x)), 2))
        x = x.view(-1, self.num_flat_features(x)) #Flatten features by batch
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        x = self.fc3(x) 
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

# Test validation runs the net after train phase with trained weights using predefined test dataloader and presenting images with their labeled class vs. predicted class 

In [None]:
def test_validation():    
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            org_data.X_test, org_data.test_labels = data
            test_outputs = net(org_data.X_test)
            _, predicted = torch.max(test_outputs.data, 1)
            total += org_data.test_labels.size(0)
            correct += (predicted == data[1]).sum().item()
            if i % 60 == 59:
                fig,ax = plt.subplots(1,test_batch)
                plt.tight_layout(w_pad=2.0)
                for img in range(len(data[1])):
                    sample = torch.squeeze(data[0][img])
                    label = data[1][img]
                    ax[img].imshow(sample)
                    ax[img].axis('off')
                    ax[img].axes.get_xaxis().set_visible(False)
                    ax[img].axes.get_yaxis().set_visible(False)
                    ax[img].text(0,50,'{}'.format(classes[label]),color='white',fontweight='bold')
                    ax[img].set_title('{}/{}: Predicted:\n{}'.format(i+1,img+1,classes[predicted[img]]))   
                plt.show()
                
    print('% Accuracy of the network on test images:', round(100 * correct / total,2))

# Main Program


In [None]:
if __name__ == '__main__':
    
    #Create Train / Test DataLoaders
    org_data = IMG_Dataset('dataset_attr.csv.zip')
    trainloader = DataLoader(org_data, batch_size=train_batch,
                             shuffle=True, num_workers=4)
    testloader = DataLoader(org_data, batch_size=test_batch,
                            shuffle=False, num_workers=4)
     
    
    #Call LeNet5 Network and declare Loss criteria and Optimizing Method
    net = Net()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    loss_history = []
    accuracy_history = []
    #Training Main loop
    for epoch in range(EPOCHS):  # loop over the dataset multiple times
        running_loss = 0.0
        total = 0
        correct = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            train_outputs = net(inputs)
            _, predicted = torch.max(train_outputs.data, 1)
            total = org_data.train_labels.size(0)
            correct += (predicted == data[1]).sum().item()
            loss = criterion(train_outputs, labels)
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 100 == 99:    # print every 10 mini-batches
                print('[%d, %5d] loss: %.3f ' %
                      (epoch + 1, i + 1, running_loss))
        loss_history.append(running_loss)
        accuracy = 100*correct/total
        accuracy_history.append(accuracy)
        print('epoch',epoch+1,'completed with loss:',round(running_loss,3),' & accuracy[%]:',round(accuracy,2))
    print('Finished Training')
    
    # Plot loss and accuracy progress during training phase
    dx = range(EPOCHS)
    headline1 = 'Cross-Entropy Loss w. Adam opt.(LR='+str(learning_rate)+')'
    text1 = 'After '+str(EPOCHS)+' epochs, Loss is:'+str(round(loss_history[-1],3))
    plt.plot(dx,loss_history,c='r',linewidth=4,label='Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(headline1)
    plt.text(int(EPOCHS/2),50,text1)
    plt.show()
    
    headline2 = 'Accuracy w. Adam opt.(LR='+str(learning_rate)+')'
    text2 = 'After '+str(EPOCHS)+' epochs, Accuracy is:'+str(round(accuracy_history[-1],2))
    plt.plot(dx,accuracy_history,c='b',linewidth=4,label='Accuracy') 
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title(headline2)
    plt.text(int(EPOCHS/2),90,text2)
    plt.show()

# Run validation on test data

In [None]:
test_validation()