In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import transforms

train_dataset_path = '/content/cat_and_dog/training_set/training_set'
test_dataset_path =  '/content/cat_and_dog/test_set/test_set'

trans = transforms.Compose([transforms.Resize((96, 96)), transforms.ToTensor()])
train_data = torchvision.datasets.ImageFolder(root = train_dataset_path, transform = trans)
test_data = torchvision.datasets.ImageFolder(root = test_dataset_path, transform = trans)

print('class:', train_data.classes)
print('train_data:', len(train_data))
print('test_data:', len(test_data))

train_data_loader = torch.utils.data.DataLoader(train_data, 
                                                batch_size = 32,
                                                shuffle = True)

test_data_loader = torch.utils.data.DataLoader(test_data, 
                                               batch_size = 32,
                                               shuffle = True)

class: ['cats', 'dogs']
train_data: 8005
test_data: 2023


In [32]:
class Net(nn.Module): 
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6,
                           kernel_size = 3, stride = 1, padding = 2)
    self.bn1 = nn.BatchNorm2d(6)
    self.max_pool1 = nn.MaxPool2d(kernel_size = 2)
    self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12,
                           kernel_size = 3, stride = 1, padding = 1)
    self.bn2 = nn.BatchNorm2d(12)
    self.max_pool2 = nn.MaxPool2d(kernel_size = 2)
    self.conv3 = nn.Conv2d(in_channels = 12, out_channels = 24,
                           kernel_size = 3, stride = 1, padding = 1)
    self.bn3 = nn.BatchNorm2d(24)
    self.max_pool3 = nn.MaxPool2d(kernel_size = 2)
    self.conv4 = nn.Conv2d(in_channels = 24, out_channels = 48, 
                           kernel_size = 3, stride = 1, padding = 1)
    self.bn4 = nn.BatchNorm2d(48)
    self.max_pool4 = nn.MaxPool2d(kernel_size = 2)
    self.dropout1 = nn.Dropout2d(0.5)
    self.fc1 = nn.Linear(1728, 512)
    #self.bn5 = nn.BatchNorm1d(512)
    #self.dropout2 = nn.Dropout(0.5)
    self.fc2 = nn.Linear(512, 256)
    #self.dropout3 = nn.Dropout(0.5)
    self.fc3 = nn.Linear(256, 5)
    

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = F.relu(x)
    x = self.max_pool1(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x = F.relu(x)
    x = self.max_pool2(x)
    x = self.conv3(x)
    x = self.bn3(x)
    x = F.relu(x)
    x = self.max_pool3(x)
    x = self.conv4(x)
    x = self.bn4(x)
    x = F.relu(x)
    x = self.max_pool4(x)
    x = self.dropout1(x)
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    #x = self.bn5(x)
    #x = self.dropout2(x)
    x = self.fc2(x)
    #x = self.dropout3(x)
    x = self.fc3(x)
    return x

In [33]:
# hyperparameters
LEARNING_RATE = 0.001
epoch_count = 15

# gpu check
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("device:", device)

# model set (gpu)
model = Net().to(device)
print(model)

# SGD optimizer
#optimizer = optim.SGD(model.parameters(), lr = LEARNING_RATE)
optimizer = optim.Adam(model.parameters()) 

# MSE loss
loss_fn = nn.CrossEntropyLoss()

for epoch in range(epoch_count):

  # set train mode
  model.train()

  for batch_idx, (x, y) in enumerate(train_data_loader):
    x, y = x.to(device), y.to(device)
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimizer.step()

  print('Train Epoch: {} [{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, len(train_data_loader.dataset),
        100. * batch_idx / len(train_data_loader), loss.item()))

  # set test mode
  model.eval()
  test_loss = 0
  correct = 0
  for x, y in test_data_loader:
    x, y = x.to(device), y.to(device)
    y_pred = model(x)
    test_loss += loss_fn(y_pred, y).item()  # sum up batch loss
    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(y.view_as(pred)).sum().item()

  test_loss /= len(test_data_loader.dataset)

  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
      test_loss, correct, len(test_data_loader.dataset),
      100. * correct / len(test_data_loader.dataset)))


device: cuda
Net(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (bn1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(24, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool4): MaxPool2d(kernel_size=2, stride=2, pa