In [1]:
import cv2
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [2]:
device = torch.device("cuda"if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
dataset_root = 'datasets'

In [4]:
def resize_image(image, size=(32,32)):
    return cv2.resize(image, size)

In [5]:
def normalize_image(image):
    return image / 255.0

In [6]:
def one_hot_encode(labels):
    encoder = OneHotEncoder(sparse_output=False)
    labels = np.array(labels).reshape(-1, 1)
    encoder.fit(labels)
    return encoder.transform(labels)

In [7]:
def load_data(folder):
    images = []
    labels = []

    train_df = pd.read_csv(os.path.join(dataset_root, folder))

    for index, row in train_df.iterrows():
        image_file = row['Path']
        image_path = os.path.join(dataset_root, image_file)
        
        image = cv2.imread(image_path)
        image = resize_image(image)
        image = normalize_image(image)
        
        images.append(image)
        labels.append(row['ClassId'])
    
    images = np.array(images)
    labels = np.array(labels)
    
    labels = one_hot_encode(labels)
    
    return images, labels

In [8]:
def apply_transforms(images, transform):
    res = []
    for image in images:
        img = transform(image)
        res.append(img.numpy())
    return np.array(res)

In [85]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv1_ = nn.Conv2d(32, 32, kernel_size=3, padding=1)

        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv2_ = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.bn2 = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3_ = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.bn3 = nn.BatchNorm2d(128)

        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4_ = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.bn4 = nn.BatchNorm2d(256)
        
        self.pool = nn.MaxPool2d(2, 2)

        self.dropout = nn.Dropout(0.5)

        self.activate = F.elu
        
        self.fc1 = nn.Linear(256 * 2 * 2, 256)
        self.fc2 = nn.Linear(256, 43)

    def forward(self, x):
        x = self.activate(self.bn1(self.conv1(x)))
        x = self.activate(self.conv1_(x))
        x = self.activate(self.conv1_(x))
        x = self.pool(x)
        
        x = self.activate(self.bn2(self.conv2(x)))
        x = self.activate(self.conv2_(x))
        x = self.activate(self.conv2_(x))
        x = self.pool(x)
        
        x = self.activate(self.bn3(self.conv3(x)))
        x = self.activate(self.conv3_(x))
        x = self.activate(self.conv3_(x))
        x = self.pool(x)

        x = self.activate(self.bn4(self.conv4(x)))
        x = self.activate(self.conv4_(x))
        x = self.activate(self.conv4_(x))
        x = self.pool(x)
        
        x = x.reshape(-1, 256 * 2 * 2)
        x = self.dropout(self.activate(self.fc1(x)))
        x = self.fc2(x)
        return x

In [49]:
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(32, scale = (0.7, 1.0)),
    transforms.ColorJitter(brightness = 0.3, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.ToTensor()
])

test_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

In [41]:
images_, labels_ = load_data('Train.csv')
test_images, test_labels = load_data('Test.csv')

In [50]:
images = images_.copy()
labels = labels_.copy()
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size = 0.2, random_state = 42)

In [51]:
X_test = test_images
y_test = test_labels

X_train = apply_transforms(X_train, train_transforms)
X_val = apply_transforms(X_val, test_transforms)
X_test = apply_transforms(X_test, test_transforms)

X_train = np.array(X_train)
X_val = np.array(X_val)
X_test = np.array(X_test)

In [52]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [92]:
model = CNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100

best_loss = 99
break_num = 5
break_counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        #inputs = inputs.permute(0,3,1,2)
        outputs = model(inputs)
        loss = criterion(outputs, torch.argmax(labels, dim=1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    loss = running_loss/len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss}")
    if best_loss > loss:
        best_loss = loss
        break_counter = 0
    else:
        break_counter += 1
        if break_counter == break_num:
            print("Early Break")
            break;

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        #inputs = inputs.permute(0,3,1,2)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == torch.argmax(labels, dim=1)).sum().item()

print(f"Validation Accuracy: {correct / total}")

Epoch [1/100], Loss: 1.451705462426372
Epoch [2/100], Loss: 0.37097874451845947
Epoch [3/100], Loss: 0.20488603660825436
Epoch [4/100], Loss: 0.14834383197504336
Epoch [5/100], Loss: 0.15202699153629073
Epoch [6/100], Loss: 0.12700084511018994
Epoch [7/100], Loss: 0.11863096592204656
Epoch [8/100], Loss: 0.12912651864581082
Epoch [9/100], Loss: 0.11383265849365462
Epoch [10/100], Loss: 0.08519250854673532
Epoch [11/100], Loss: 0.14258542347522113
Epoch [12/100], Loss: 0.07976430736300075
Epoch [13/100], Loss: 0.09930468574904942
Epoch [14/100], Loss: 0.11552304051296439
Epoch [15/100], Loss: 0.08619535765006342
Epoch [16/100], Loss: 0.09851129396168808
Epoch [17/100], Loss: 0.08887642490941945
Early Break
Validation Accuracy: 0.9937515939811272


In [93]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        #inputs = inputs.permute(0,3,1,2)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == torch.argmax(labels, dim=1)).sum().item()

print(f"Test Accuracy: {correct / total}")

Test Accuracy: 0.973396674584323


In [94]:
torch.save(model, "CNN_model.pth")