In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as F

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [2]:
obj_classes = {
    "002_master_chef_can": 0,
    "003_cracker_box": 1,
    "004_sugar_box": 2,
    "005_tomato_soup_can": 3,
    "006_mustard_bottle": 4,
    "007_tuna_fish_can": 5,
    "008_pudding_box": 6,
    "009_gelatin_box": 7,
    "010_potted_meat_can": 8,
    "011_banana": 9,
    "019_pitcher_base": 10,
    "021_bleach_cleanser": 11,
    "024_bowl": 12,
    "025_mug": 13,
    "035_power_drill": 14,
    "036_wood_block": 15,
    "037_scissors": 16,
    "040_large_marker": 17,
    "051_large_clamp": 18,
    "052_extra_large_clamp": 19,
    "061_foam_brick": 20
}

def count_params(net, trainable=False):
    if trainable:
        count = sum(p.numel() for p in net.parameters() if p.requires_grad)
    else:
        count = sum(p.numel() for p in net.parameters())
    return count

def to_csv(fname, img_ids=None, results=None):
    results = np.array(results).astype(int)
    df = pd.DataFrame([pd.Series(x) for x in results])
    df.columns = ['class_{}'.format(x) for x in df.columns]
    df = df.assign(img_id = img_ids)

    cols = df.columns.to_list()
    df = df[[cols[-1]] + cols[:-1]]

    df.to_csv(fname, index=False)

In [3]:
transform = torch.nn.Sequential(
    transforms.Resize([224, 224], antialias=True)
)

In [4]:
class YCBDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir="", transform=None, is_train=True):
        self.data_dir = data_dir
        self.data_idx = np.loadtxt(f"{self.data_dir}/data.txt", delimiter=",", dtype=int)
        self.transform = transform
        self.is_train = is_train
        
    def __len__(self):
        return self.data_idx.shape[0]

    def _load_labels(self, fname):
        '''
        Reads label
        '''
        f = open(fname, 'r')
        data = f.readlines()
        labels = []
        for line  in data:
            l = line.split(' ')[0]
            labels.append(l)
        f.close()
        return labels
        
    def __getitem__(self, index):
        v_num, t_num = self.data_idx[index]
        
        img_id = f"{v_num:04}_{t_num:06}"
        fname = f"{self.data_dir}/{v_num:04}/{t_num:06}"

        X = torchvision.io.read_image(f"{fname}-color.png")

        # preprocess for size
        preprocess = transforms.ConvertImageDtype(torch.float32)
        X_original = preprocess(X)

        if self.transform is not None:
            X_transformed = self.transform(X_original)
        else:
            X_transformed = X_original

        if not self.is_train: # no labels are provided for testing set
            y = torch.FloatTensor(np.ones(len(obj_classes))*(-1.0))
        else:
            # load string labels and convert to k-hot labels
            string_labels = self._load_labels(f"{fname}-box.txt")
            y = np.zeros(len(obj_classes))
            for l in string_labels:
                y[obj_classes[l]] = 1.0
            y = torch.FloatTensor(np.array(y))

        return img_id, X_original, X_transformed, y

train_dataset = YCBDataset(data_dir="/kaggle/input/cs3264-assignment-2-ay2425s1/ycb_dataset/train_data", transform=transform, is_train=True)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=50, shuffle=True)

test_dataset = YCBDataset(data_dir="/kaggle/input/cs3264-assignment-2-ay2425s1/ycb_dataset/test_data", transform=transform, is_train=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=50, shuffle=False)

In [5]:
img_id, X_original, X_transformed, y = train_dataset[0]

print(X_transformed.shape)
print(y.shape)

torch.Size([3, 224, 224])
torch.Size([21])


In [6]:
class Classifier(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(8),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.lin = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 14 * 14, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 21)
        )

    def forward(self, X):
        X = self.conv(X)
        X = self.lin(X)
        
        return X
    
    def fit(self, dataloader):
        epoch = 10
        lr = 0.001
        
        loss_fn = nn.BCEWithLogitsLoss()
        optimiser = optim.Adam(self.parameters(), lr=lr)
        
        self.train()
        for i in range(epoch):
            for _, _, X, y in dataloader:
                optimiser.zero_grad()
                y_pred = self.forward(X)
                
                loss = loss_fn(y_pred, y)
                loss.backward()
                optimiser.step()
            
            print(f'Epoch {i} - Loss: {loss.item()}')
    
    def predict(self, dataloader, threshold):        
        img_ids = []
        results = []
        
        self.eval()
        with torch.no_grad():
            for img_id, _, X, _ in dataloader:
                X = X.to(device)
                y_pred_logits = self.forward(X)
                
                y_pred_sigmoid = torch.sigmoid(y_pred_logits)
                y_pred = torch.zeros_like(y_pred_logits)
                y_pred[y_pred_sigmoid > threshold] = 1.0
                for i in range(X.shape[0]):
                    img_ids.append(img_id[i])
                    results.append(y_pred[i].cpu().numpy())
        
        return img_ids, results

In [7]:
model = Classifier()

print(F'Network has {count_params(model, trainable=False)} parameters in total')
print(F'Network has {count_params(model, trainable=True)} trainable parameters in total')

Network has 13406421 parameters in total
Network has 13406421 trainable parameters in total


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.to(device)

Classifier(
  (conv): Sequential(
    (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): LeakyReLU(negative_slope=0.1)
    (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): LeakyReLU(negative_slope=0.1)
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): LeakyReLU(negative_sl

In [9]:
model.fit(train_dataloader)

Epoch 0 - Loss: 0.30428943037986755
Epoch 1 - Loss: 0.14760026335716248
Epoch 2 - Loss: 0.061615824699401855
Epoch 3 - Loss: 0.07085715979337692
Epoch 4 - Loss: 0.03704853355884552
Epoch 5 - Loss: 0.01527936477214098
Epoch 6 - Loss: 0.015655595809221268
Epoch 7 - Loss: 0.03787003457546234
Epoch 8 - Loss: 0.017221296206116676
Epoch 9 - Loss: 0.01857515051960945


In [10]:
img_ids, results = model.predict(test_dataloader, 0.5)

In [11]:
to_csv("submission.csv", img_ids=img_ids, results=results)