<h1 align="center">Convotutional Neural Network (CNN) for Cat-Dog Image Recognition</h1>
<table>
<tr>
<td><img src="cat.jpg" width="320"></td>
<td><img src="dog.jpg" width="320"></td>
</tr>
</table>

<hr>
<h3>Show samples in unknown folder</h3>
<pre>
images
    0: cat
    1: dog
    u: (unknown)
</pre>

In [1]:
import numpy as np
import cv2
import glob

def show_sample(img):
    # cv2.imshow 使用的 RGB 數值是 (0.0,1.0)
    img = img / 255.0
    # resize 影像至 (256,256) 方便觀看
    img256 = cv2.resize(img, (256, 256))
    # 顯示影像
    cv2.namedWindow('Sample')
    cv2.imshow('Sample', img256)
    # 按下 SPACEBAR 關閉
    while (True):
        c = cv2.waitKey(500)
        if (c == ord(' ')): 
            break
    cv2.destroyAllWindows()
    return

ilst = glob.glob('images/u/*.*')

for f in ilst:
    print(f)
    img = cv2.imread(f)
    show_sample(img)


images/u\0.jpg
images/u\00.jpg
images/u\1.jpg
images/u\11.jpg


<hr>
<h3>Load all training samples of cat (0) & dog (1)</h3>
<p>image_size = <strong style="color:red">64</strong></p>
<pre>
images: all images (64x64x3)
labels: all labels (2)
</pre>

In [2]:
# Load all training samples of cat (0) & dog (1)

import numpy as np
import cv2
import glob

# Normalized size of sample images
image_size = 64
# Class index (labels): 0 and 1; (0 for egret & 1 for flamingo)
class_num = 2
class_name = [ 0, 1 ]

# 計算樣本數
count = 0
for c in class_name:
    clst = glob.glob('images/%d/*.*' % c)
    for f in clst:
        count = count + 1
sample_n = count

images = np.zeros([sample_n,image_size,image_size,3])
labels = np.zeros([sample_n,class_num])

count = 0
for c in class_name:
    clst = glob.glob('images/%d/*.*' % c)
    for f in clst:
        # rint(f)
        img = cv2.imread(f)
        img = cv2.resize(img,(image_size,image_size))
        # show_sample(img)
        images[count,:,:,:] = img[:,:,:]
        labels[count,:] = np.zeros([class_num])
        labels[count,c] = 1
        count = count + 1

# print(images)
# print(labels)

print('Samples are ready. (n=%d)' % (sample_n))


Samples are ready. (n=20)


<hr>
<h3>Basic Parameters</h3>

In [3]:
# 載入 modules

import torch
import torch.nn as nn

# pytorch reproducible
torch.manual_seed(1)

# 設定基本參數並載入 MNIST Dataset

# 基本參數

# 訓練回合數
EPOCH = 1000
# Learning Rate (LR)
LR = 0.001
# (RGB Channels)
CHANNEL = 3
# 影像尺寸 IMAGE_SIZE x IMAGE_SIZE
IMAGE_SIZE = image_size
# 總樣本數
N = sample_n


<hr>
<h3>CNN for 64x64 color images (RGB) (Cat vs. Dogs)</h3>
<p style="color:red">4 convolutional layers</p>

In [4]:
# CNN 定義

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional Layer 1
        self.conv1 = nn.Sequential(
            # input shape (3, 64, 64),  (IN_CHANNEL=3, IMAGE_SIZE=64, IMAGE_SIZE=64)
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2),
            # output shape (16, 64, 64), (OUT_CHANNEL=16, IMAGE_SIZE=64, IMAGE_SIZE=64)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (16, 32, 32), (OUT_CHANNEL=16, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            nn.MaxPool2d(kernel_size=2)
        )
        # Convolutional Layer 2
        self.conv2 = nn.Sequential(
            # input shape (16, 32, 32), (IN_CHANNEL=16, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            nn.Conv2d(16, 32, 5, 1, 2),
            # output shape (32, 32, 32), (OUT_CHANNEL=32, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (32, 16, 16), (OUT_CHANNEL=32, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            nn.MaxPool2d(2)
        )
        # Convolutional Layer 3
        self.conv3 = nn.Sequential(
            # input shape (32, 16, 16), (IN_CHANNEL=32, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            nn.Conv2d(32, 64, 5, 1, 2),
            # output shape (64, 16, 16), (OUT_CHANNEL=64, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (64, 8, 8), (OUT_CHANNEL=64, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            nn.MaxPool2d(2)
        )
        # Convolutional Layer 4
        self.conv4 = nn.Sequential(
            # input shape (32, 16, 16), (IN_CHANNEL=64, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            nn.Conv2d(64, 128, 5, 1, 2),
            # output shape (64, 16, 16), (OUT_CHANNEL=128, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (128, 4, 4), (OUT_CHANNEL=128, IMAGE_SIZE/16=4, IMAGE_SIZE/16=4)
            nn.MaxPool2d(2)
        )
        # Dropout，丟掉一些 weights，防止 overfitting
        self.drop_out = nn.Dropout()
        # Fully connected layer 1, output 1024 classes, (IN=128*4*4=1024, OUT=256)
        self.fc1 = nn.Sequential(
            nn.Linear(128 * int(IMAGE_SIZE/16) * int(IMAGE_SIZE/16), 256),
            nn.ReLU()
        )
        # Fully connected layer 2, output 16 classes, (IN=256, OUT=16)
        self.fc2 = nn.Sequential(
            nn.Linear(256, 16),
            nn.ReLU()
        )
        # Fully connected layer 3, output 2 classes, (IN=16, OUT=2)
        self.fc3 = nn.Sequential(
            nn.Linear(16, 2)
        )
    def forward(self, x):
        net = self.conv1(x)
        net = self.conv2(net)
        net = self.conv3(net)
        net = self.conv4(net)
        # CNN reshape, (batch_size, 128 * 4 * 4)
        net = net.view(net.size(0), -1)
        net= self.drop_out(net)
        net= self.fc1(net)
        net= self.fc2(net)
        output = self.fc3(net)
        return output

# CNN 產生
cnn = CNN()

# net architecture
print(cnn)


CNN(
  (conv1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc1): Sequential(
    (0): Linear(in_features=2048, out_features=256, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in

<hr>
<h3>Move to GPU if CUDA Available</h3>

In [5]:

# Create random Tensors to hold inputs and outputs
test_x = torch.randn(N, CHANNEL, IMAGE_SIZE, IMAGE_SIZE)
test_y = torch.randn(N, class_num)

# Move to GPU

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
cnn.to(device)
test_x, test_y = test_x.to(device), test_y.to(device)


cpu


<hr>
<h3>CNN Model Training</h3>

In [6]:
# CNN model training

# shuffle

def shuffle(test_x, test_y):
    idx = [x for x in range(N)]
    idx = np.random.permutation(idx)
    for s in range(N):
        img = images[idx[s]]
        img = np.swapaxes(img, 0, 1)
        img = np.swapaxes(img, 0, 2)
        lbl = labels[idx[s]]
        test_x[s] = torch.from_numpy(img)
        test_y[s] = torch.from_numpy(lbl)
    return test_x, test_y

# cnn training

def cnn_training(cnn, test_x, test_y):
    # loss function changed to be MSE (from Cross Entropy)
    loss_func = torch.nn.MSELoss(reduction='sum')
    # optimize all cnn parameters
    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
    # Training iteratively
    for epoch in range(EPOCH+1):
        # shuffle
        b_x , b_y = shuffle(test_x, test_y)
        # cnn output
        output = cnn(b_x)
        # cross entropy loss
        loss = loss_func(output, b_y)
        # display err
        if (epoch % 10 == 0):
            err = loss.item() / sample_n
            print('%5d, %18.12f' % (epoch, err))
        # clear gradients for this training step
        optimizer.zero_grad()
        # backpropagation, compute gradients
        loss.backward()
        # apply gradients
        optimizer.step()
    return cnn

cnn = cnn_training(cnn, test_x, test_y)

# Save torch model
torch.save(cnn.state_dict(), 'cnn_cat_dog.model')


    0,     1.764113807678
   10,     0.518979787827
   20,     0.503824710846
   30,     0.458976507187
   40,     0.360152721405
   50,     0.100090253353
   60,     0.032498863339
   70,     0.023510664701
   80,     0.013741017878
   90,     0.029841819406
  100,     0.012490911037
  110,     0.010241258144
  120,     0.006479963660
  130,     0.009478911012
  140,     0.006573445350
  150,     0.004188854247
  160,     0.007155989110
  170,     0.015013098717
  180,     0.007167615741
  190,     0.006097898632
  200,     0.006228988618
  210,     0.006476578861
  220,     0.004904443398
  230,     0.002137554251
  240,     0.004946892709
  250,     0.003102415055
  260,     0.004391370714
  270,     0.005497048423
  280,     0.002854255401
  290,     0.013952103257
  300,     0.006861539185
  310,     0.004097914323
  320,     0.007272765785
  330,     0.005680207908
  340,     0.003956465796
  350,     0.005587508529
  360,     0.004872219265
  370,     0.003743065521
  380,     0

<hr>
<h3>Load CNN Model of CD samples</h3>

In [7]:
def load_cnn_model():
    # Load torch model
    cnn.load_state_dict(torch.load('cnn_cat_dog.model', map_location='cpu'))
    cnn.eval()
    print('Load previous cnn model completely!')
    return
    
load_cnn_model()

Load previous cnn model completely!


<hr>
<h3>Test unknown samples</h3>

In [8]:

def cnn_testing():
    ilst = glob.glob('images/u/*.*')
    n = len(ilst)
    res = []
    for f in ilst:
        # print(f)
        img = cv2.imread(f)
        img = cv2.resize(img,(image_size,image_size))
        img = np.swapaxes(img, 0, 1)
        img = np.swapaxes(img, 0, 2)
        t_x = torch.randn(1, 3, image_size, image_size)
        t_x = t_x.to(device)
        t_x[0] = torch.from_numpy(img)
        dic = { 'Cat':0, 'Dog':0 }
        for i in range(5):
            output = cnn(t_x)
            olst = output.tolist()
            for o in olst:
                if (o[0] >= o[1]):
                    k = 'Cat'
                else:
                    k = 'Dog'
                dic[k] = dic[k] + 1
                # print(olst, k)
        if (dic['Cat'] >= dic['Dog']):
            res.append([f, 'Cat'])
        else:
            res.append([f, 'Dog'])
    return res

if __name__ == '__main__':
    res = cnn_testing()
    print(res)


[['images/u\\0.jpg', 'Cat'], ['images/u\\00.jpg', 'Cat'], ['images/u\\1.jpg', 'Dog'], ['images/u\\11.jpg', 'Dog']]
