Imports

In [None]:
from pandas.core.common import flatten
from torch.utils.data import Dataset, DataLoader
import cv2
import glob
import tqdm
import torch
import torch.nn as nn
import torch.optim as optim

Get file paths for train, test, and val images

In [10]:
# we need to get file paths so we can create Custom Dataset Class

train_data_path = 'Dataset/train' 
valid_data_path = 'Dataset/valid'
test_data_path = 'Dataset/test'

classes = [] 

# get train file paths
train_image_paths = [] 
for data_path in glob.glob(train_data_path + '/*'): # gets all file paths recursively in train directory 
    classes.append(data_path.split('/')[-1])    # e.g. Dataset_2/train/antelope/fasdfdsaf.jpg -> antelope
    train_image_paths.append(glob.glob(data_path + '/*'))   # append file path 
    
train_image_paths = list(flatten(train_image_paths))    # flatten list 

# now valid
valid_image_paths = [] 
for data_path in glob.glob(valid_data_path + '/*'):
    valid_image_paths.append(glob.glob(data_path + '/*'))
valid_image_paths = list(flatten(valid_image_paths))

# now test
test_image_paths = []
for data_path in glob.glob(test_data_path + '/*'):
    test_image_paths.append(glob.glob(data_path + '/*'))
test_image_paths = list(flatten(test_image_paths))

Since we cannot use strings, map class names to indexes

In [11]:
# Map an index to a class name
idx_to_class = {i:j for i, j in enumerate(classes)}

# Map a class name to an index (simply reverse key value pair of idx_to_class)
class_to_idx = {value:key for key,value in idx_to_class.items()}

Implement custom dataset class in pytorch

In [12]:
# create custom Dataset class
# need to implement __init__, __len__, and __getitem__
class Dataset(Dataset):
    def __init__(self, image_paths, transform=False):
        self.image_paths = image_paths
        # transforms were done using RoboFlow
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = image_filepath.split('/')[-2]   # gets the class of the image from the filepath
        label = class_to_idx[label]     # converts to idx
        
        return (image, label)

Create datasets and dataloaders

In [13]:
train_dataset = Dataset(train_image_paths)
valid_dataset = Dataset(valid_image_paths) 
test_dataset = Dataset(test_image_paths)

In [14]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Create out Classification model using 2D CNNs

In [15]:
class CNNModel(nn.Module):
    def __init__(self, num_classes=90):
        super(CNNModel, self).__init__()
        
        # 5 Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        
        # Pooling for generalization
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # FC layers for flattening to 1d space
        self.fc1 = nn.Linear(512 * 20 * 20, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        
        # Use ReLU as activation function, very common for deep learning
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.pool(self.relu(self.conv4(x)))
        x = self.pool(self.relu(self.conv5(x)))
        #print(x.shape) -> conv5 out shape is (512, 20, 20)
        
        # Flatten the output for fully connected layers
        x = x.reshape(-1, 512 * 20 * 20) 

        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

Train, test, and validate

This process is way too slow on my macbook air, but this is an example for how the model would be implemented. Likely would need more than 5 layers to have good classification accuracy, but this is more a proof of concept.

In [16]:
model = CNNModel()

criterion = nn.CrossEntropyLoss()   # good for classification models
optimizer = optim.Adam(model.parameters(), lr=0.001)    # Adam allows for dynamic lr 

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:        
        inputs = inputs.type(torch.FloatTensor).permute(0, 3, 1, 2) # B x H x W x C -> B x C x H x W

        # zero the gradients
        optimizer.zero_grad()

        # get output and compute loss, backpropogate
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # track loss and correct predictions
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    # calculate train loss and accuracy
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    return train_loss, train_acc


def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:            
            inputs = inputs.type(torch.FloatTensor).permute(0, 3, 1, 2) # B x H x W x C -> B x C x H x W

            # no backprop needed
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    test_loss = running_loss / len(test_loader)
    test_acc = 100. * correct / total
    return test_loss, test_acc

# Function to perform validation
def validate(model, valid_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs = inputs.type(torch.FloatTensor).permute(0, 3, 1, 2) # B x H x W x C -> B x C x H x W

            # no backprop needed
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    valid_loss = running_loss / len(valid_loader)
    valid_acc = 100. * correct / total
    return valid_loss, valid_acc


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 1    # good starting point for computer vision tasks

for epoch in range(num_epochs):
    break   # takes extremely long time to run, my computer is not powerful enough
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(model, test_loader, criterion, device)
    valid_loss, valid_acc = validate(model, valid_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"Train Loss: {train_loss:.2f} | Train Acc: {train_acc:.2f}%")
    print(f"Test Loss: {test_loss:.2f} | Test Acc: {test_acc:.2f}%")
    print(f"Validation Loss: {valid_loss:.2f} | Validation Acc: {valid_acc:.2f}%")