In [2]:
#Name: Soy Vitou
#Class: ITE-A
#Project: Animal Recognition Using Convolutional Neural Network 

#Library using Pytorch 

In [3]:
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pathlib
from torch.optim import Adam
import glob


### Data loading and preprocessing

- TRANSFORMS.TOTENSOR()                  
- MEANING :                              
- Convert 0-255 TO 0-1, NUMPY TO TENSORS 

- TRANSFORMS.NORMALIZE([0.5,0.5,0.5],[0.5,0.5,0.5]) 
- MEANING :                                         
- 0-1 TO RANGE [-1,1] , FORMULA IS (X - MEAN)/STD   


### Path of Datasets

In [4]:
train_path = "./dataset/train"
test_path = "./dataset/test"

### Load image

In [5]:
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
])

train_data = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform = transformer),
    batch_size = 32,
    shuffle = True,
)
test_data = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform = transformer),
    batch_size = 32,
    shuffle = True,
)

> * Checking for devices

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

> * Print categories

In [7]:
root = pathlib.Path(train_path)
classes = sorted([j.name] for j in root.iterdir())
print(classes)

[['cats'], ['dogs']]


### How the CatDogClassifier model works
* The model takes an input image of size `(256, 3, 150, 150)` and applies a series of convolutional layers to extract features from the image.
* The first convolutional layer has 12 filters, each of which is 3x3 pixels in size. The output of the first convolutional layer is a feature map of size `(256, 12, 150, 150)`.
* The second convolutional layer has 20 filters, each of which is 3x3 pixels in size. The output of the second convolutional layer is a feature map of size `(256, 20, 75, 75)`.
* The third convolutional layer has 32 filters, each of which is 3x3 pixels in size. The output of the third convolutional layer is a feature map of size `(256, 32, 75, 75)`.
* After the convolutional layers, the model applies a `max pooling` layer to reduce the size of the feature map by a factor of `2`.
* The output of the `max pooling` layer is `flattened` and `fed` into a `fully connected` layer with `1024 `neurons.
* The output of the fully connected layer is then fed into a final layer with `2` neurons, one for each class (`cat` or `dog`).

### brief the model work
* Input: Image of size `(256, 3, 150, 150)`
* Output: Class prediction (`cat` or `dog`)
### Architecture

* `3` convolutional layers with `12`, `20`, and `32` filters, respectively

![title](./image/conv2d.gif)

* `1` max pooling layer

![title](./image/MaxpoolSample2.png)

* `1` fully connected layer with `1024` neurons

![title](./image/full_connected.jpg)

In [8]:
class CatDogClassifier(nn.Module):

    def __init__(self, num_classes):
        super(CatDogClassifier, self).__init__()
        
        #Output size after convolutional filter
        #((W - F + 2P)/ S) + 1 
      
        
        #Input shape = (256, 3, 150, 150)
        
        self.conv1 = nn.Conv2d(3, 12, kernel_size=3, stride = 1, padding = 1)
        #Shape = (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        #Shape = (256, 12, 150, 150)
        self.relu1 = nn.ReLU()
        #Shape = (256, 12, 150, 150)
        
        self.pool = nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be fector 2
        #Shape = (256, 12, 75, 75)
        
        self.conv2 = nn.Conv2d(12, 20, kernel_size=3, stride = 1, padding = 1)
        #Shape = (256, 20, 75, 75)
        self.relu2 = nn.ReLU()
        #Shape = (256, 20, 75, 75)
        
        self.conv3 = nn.Conv2d(20, 32, kernel_size=3, stride = 1, padding = 1)
        #Shape = (256, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        #Shape = (256, 32, 75, 75)
        self.relu3 = nn.ReLU()
        #Shape = (256, 32, 75, 75)
        
        self.fc = nn.Linear(in_features=32*75*75, out_features=num_classes)
        
    #Feed forward function
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        output = self.pool(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        #Above output will be in matrix form, with shape (256, 32, 75, 75)
        output = output.view(-1, 32*75*75)
        
        #fully connected layer
        output = self.fc(output)
        
        return output

### The model and criterion
* model = CatDogClassifier model with 2 classes
* device = GPU if available, CPU otherwise
* criterion = Cross-entropy loss function

In [9]:
model = CatDogClassifier(len(classes)).to(device)
criterion = nn.CrossEntropyLoss()

### Optimizer and lose function
* optimizer = Adam optimizer with learning rate `0.001` and weight decay `0.0001`
* lose_function = Cross-entropy loss function

In [10]:
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
lose_function = nn.CrossEntropyLoss()

### Get size of training and testing images
* use train count and test count for calculate test accuracy

In [11]:
train_count = len(glob.glob(train_path + '/**/*.jpg'))
test_count = len(glob.glob(test_path + '/**/*.jpg'))

* Print how many images in folder
 

In [12]:
print(train_count," : ", test_count)

559  :  399


#Models training and Saving the best models


In [13]:
epochs = 10
for epoch in range(epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy = 0.0
    train_lose = 0.0
    
    for i, (images, labels) in enumerate(train_data):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        lose = lose_function(outputs, labels)
        lose.backward()
        optimizer.step()
        
        train_lose += lose.cpu().data*images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        
        train_accuracy += int(torch.sum(prediction == labels.data))
    
    train_accuracy = train_accuracy /train_count
    train_lose = train_lose / train_count
    
    
    #Evaluation on testing dataset
    model.eval()
    
    test_accuracy = 0.0
    
    for i, (images, labels) in enumerate(test_data):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction == labels.data))
        
    test_accuracy = test_accuracy /train_count
    
    print('Epoch: ' + str(epoch) + ' => Train lose :' + str(int(train_lose)) + ' =>Test Accuracy : ' + str(test_accuracy))
    best_accuracy = 0.0
    #Saving the best models
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy = test_accuracy

Epoch: 0 => Train lose :7 =>Test Accuracy : 0.3667262969588551
Epoch: 1 => Train lose :3 =>Test Accuracy : 0.33810375670840787
Epoch: 2 => Train lose :1 =>Test Accuracy : 0.33810375670840787
Epoch: 3 => Train lose :0 =>Test Accuracy : 0.5080500894454383
Epoch: 4 => Train lose :0 =>Test Accuracy : 0.4561717352415027
Epoch: 5 => Train lose :0 =>Test Accuracy : 0.4597495527728086
Epoch: 6 => Train lose :0 =>Test Accuracy : 0.47942754919499103
Epoch: 7 => Train lose :0 =>Test Accuracy : 0.47942754919499103
Epoch: 8 => Train lose :0 =>Test Accuracy : 0.4669051878354204
Epoch: 9 => Train lose :0 =>Test Accuracy : 0.46869409660107336
