## Image Classification with CNN

> Dataset Source: https://www.kaggle.com/datasets/puneet6060/intel-image-classification

In [1]:
# import necessary packages
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [2]:
# check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
# Transforms
transformer = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [4]:
# Dataloader
train_path = '/Users/yaomingyang/Documents/Large Dataset/dataset/seg_train/seg_train'
test_path = '/Users/yaomingyang/Documents/Large Dataset/dataset/seg_test/seg_test'

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size=256, shuffle=True
)

test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform=transformer),
    batch_size=256, shuffle=True
)

In [5]:
# Categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
classes = classes[1:]
classes

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

In [6]:
# CNN Network
class ConvNN(nn.Module):
    def __init__(self, num_classes=6):
        super(ConvNN, self).__init__()

        # Input dimension = (256, 3, 150, 150) 
        # batch_size * RGB * img dimension
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # current shape: (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        self.relu1 = nn.ReLU()

        # reduce size be factor 2
        self.pool = nn.MaxPool2d(kernel_size=2)
        # shape: (256, 12, 75, 75)

        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        # shape: (256, 20, 75, 75)

        self.conv3 = nn.Conv2d(in_channels=20, out_channels=28, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(num_features=28)
        self.relu3 = nn.ReLU()
        # shape: (256, 32, 75, 75)

        # Output fully connected layer
        self.fc = nn.Linear(in_features=28*75*75, out_features=num_classes)

    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)

        output = self.pool(output)

        output = self.conv2(output)
        output = self.relu2(output)

        output = self.conv3(output)
        output = self.bn2(output)
        output = self.relu3(output)

        # Now output is (256, 32, 75, 75)
        output = output.view(-1, 32*75*75)

        output = self.fc(output)

        return output


In [7]:
model = ConvNN(num_classes=6).to(device=device)

In [8]:
# Optmizer and loss function
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

In [9]:
epochs = 10

In [10]:
# calculate the size of training and testing images
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))

In [11]:
print(train_count, test_count)

14034 3000


In [12]:
# Model training and saving best model

best_accuracy = 0.0

for epoch in range(epochs):
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.cpu())
            labels = Variable(labels.cpu())

        optimizer.zero_grad()

        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.cpu().data*images.size(0)
        _, prediction = torch.max(outputs.data, 1)

        train_accuracy += int(torch.sum(prediction == labels.data))

    train_accuracy = train_accuracy/train_count
    train_loss = train_loss/train_count

    model.eval()

    test_accuracy = 0.0

    for i, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.cpu())
            labels = Variable(labels.cpu())

        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction == labels.data))

    test_accuracy /= test_count

    print('Epoch: ' + str(epoch) + ' Training Loss: ' + str(train_loss) +
          ' Train Accuracy: ' + str(train_accuracy) + ' Test Accuracy: ' + str(test_accuracy))
    
    # Save the best model
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy = test_accuracy


Epoch: 0 Training Loss: tensor(9.1553) Train Accuracy: 0.5276471426535556 Test Accuracy: 0.606
Epoch: 1 Training Loss: tensor(1.3221) Train Accuracy: 0.725238705999715 Test Accuracy: 0.7233333333333334
Epoch: 2 Training Loss: tensor(0.9515) Train Accuracy: 0.7821718683197948 Test Accuracy: 0.6973333333333334
Epoch: 3 Training Loss: tensor(0.7886) Train Accuracy: 0.8160182414137096 Test Accuracy: 0.716
Epoch: 4 Training Loss: tensor(0.5610) Train Accuracy: 0.8623343309106456 Test Accuracy: 0.6426666666666667
Epoch: 5 Training Loss: tensor(0.3525) Train Accuracy: 0.903876300413282 Test Accuracy: 0.737
Epoch: 6 Training Loss: tensor(0.3939) Train Accuracy: 0.9011685905657688 Test Accuracy: 0.7426666666666667
Epoch: 7 Training Loss: tensor(0.2342) Train Accuracy: 0.9367250961949551 Test Accuracy: 0.7343333333333333
Epoch: 8 Training Loss: tensor(0.2354) Train Accuracy: 0.9362263075388343 Test Accuracy: 0.7133333333333334
Epoch: 9 Training Loss: tensor(0.2090) Train Accuracy: 0.940145361265