## import Libraries

In [164]:
# Imports
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import re
import pandas as pd
from io import StringIO

from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import os

from sklearn.metrics import f1_score

torch.__version__


'2.1.0'

In [165]:
# Constants
IMAGE_PATH = './data/'
TRAINING_DATA_PATH = './train.csv'
TEST_DATA_PATH = './test.csv'
LABEL_COUNT = 19
BATCH_SIZE = 32
IMAGE_SHAPE = 224
THRESHOLD = 0.5
LR = 0.001
EPOCHS = 3
WEIGHT_DECAY = 0.001

In [166]:
# CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using', device)

Using cpu


In [167]:
# Reading CSV Files
def read_csv(file_path):
    with open(file_path) as file:
        lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
        df = pd.read_csv(StringIO(''.join(lines)), escapechar="/")
    return df

df_train = read_csv(TRAINING_DATA_PATH)
df_test = read_csv(TEST_DATA_PATH)

In [168]:
# Dataset Class
class ImageDataset(Dataset):
    def __init__(self, df, image_path, transform=None, test=False):
        self.df = df
        self.image_path = image_path
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_path, self.df.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)

        if self.test:
            return image
            
        label = torch.zeros(LABEL_COUNT, dtype=torch.float32)
        label_indices = torch.tensor([int(i) for i in self.df.iloc[idx, 1].split()])
        label[label_indices-1] = 1
        return image, label

# Transforms
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SHAPE),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.Resize((IMAGE_SHAPE, IMAGE_SHAPE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Datasets and DataLoaders
train_dataset = ImageDataset(df_train, IMAGE_PATH, transform=train_transform)
test_dataset = ImageDataset(df_test, IMAGE_PATH, transform=test_transform, test=True)

# Splitting Dataset
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [169]:
# CNN Model
def compute_output(input_size, stride_size, padding_size, kernel_size):
    return ((input_size + 2 * padding_size - kernel_size) / stride_size) + 1

after_conv1 = compute_output(IMAGE_SHAPE, 2, 1,4)
after_max_pool1 = after_conv1 / 2
after_conv2 = compute_output(after_max_pool1, 1, 2 ,3)
after_max_pool2 = after_conv2 / 2
after_conv3 = compute_output(after_max_pool2, 3, 0, 3)
after_max_pool3 = after_conv3 / 2
final_image_dim = int(after_max_pool3)

print(final_image_dim)


class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()  # Compulsory operation.
        self.conv1 = nn.Conv2d(3, 32, 4, stride=2, padding=1)
        self.conv2 = nn.Conv2d(32, 48, 3, stride=1, padding=2)
        self.conv3 = nn.Conv2d(48, 64, 3, stride=3, padding=0)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout(0.25)
        self.dropout3 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64*final_image_dim*final_image_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, LABEL_COUNT)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = x.view(-1, 64*final_image_dim*final_image_dim)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        x = self.dropout3(x)
        logits = self.fc3(x)
        return logits

4


In [170]:
# Training Loop
def train(model, train_loader, optimizer, criterion):
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        preds = torch.sigmoid(outputs).cpu().detach().numpy()
        binary_preds = (preds > THRESHOLD).astype(int)
        print(binary_preds)
        loss = criterion(outputs, labels.type(torch.float))
        loss.backward()
        optimizer.step()

# Validation Function
def validate(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            preds = torch.sigmoid(outputs).cpu().detach().numpy()
            all_preds.extend(preds)
            all_targets.extend(labels.cpu().numpy())
    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)
    f1 = f1_score(all_targets, all_preds > THRESHOLD, average='micro')
    print(f1)
    return running_loss / len(val_loader.dataset), f1


In [171]:
class_counts = df_train.iloc[:, 1].str.split().apply(lambda x: [int(label) for label in x])
label_counts = np.bincount([label for labels in class_counts for label in labels], minlength=LABEL_COUNT)
total_samples = len(class_counts)
class_weights = torch.tensor([total_samples / label_counts[label] for label in range(1, LABEL_COUNT+1)])
# there are no label 12 in the training dataset
class_weights[11] = 1000

# Model Initialization
model = ConvNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)

# Training Loop
for epoch in range(EPOCHS):
    train_loss = train(model, train_loader, optimizer, criterion)
    val_loss, val_f1 = validate(model, val_loader, criterion)

[[1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1]
 [1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 0 0 1 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0]
 [1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0]
 [1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1]
 [1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 1 0]
 [1 1 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0 1 1]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0]
 [1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1]
 [1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 0 1]
 [1 1 1 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0]
 [1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0]
 [1 1 1 1 0 0 1 1 0 1 1 0 1 1 1 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 1 0]
 [1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0 0 1]
 [1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1]
 [1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1]
 [1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1]
 [1 1 1 1 0 1 1 

In [172]:

# Testing Function
import csv

# Output Predictions
def predict(model, test_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            preds = torch.sigmoid(outputs).cpu().detach().numpy()
            print(preds)
            binary_preds = (preds > THRESHOLD).astype(int)
            labels = [' '.join([str(i+1) for i, val in enumerate(pred) if val == 1]) for pred in binary_preds]
            predictions.extend(labels)
    return predictions

predictions = predict(model, test_loader)

# Export Predictions
output_file = 'predictions.csv'
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['ImageID', 'Labels'])
    for idx, img_id in enumerate(df_test['ImageID']):
        writer.writerow([img_id, predictions[idx]])

print(f'Predictions exported to {output_file}.')


[[0.8074916  0.4662866  0.47041184 0.4126474  0.2693656  0.33151096
  0.39576787 0.38230917 0.32282254 0.361495   0.4894924  0.01363777
  0.37640458 0.45791054 0.5286185  0.51100004 0.55101126 0.5789982
  0.49243745]
 [0.754994   0.5938289  0.6158458  0.6209317  0.513334   0.591385
  0.6155259  0.59782076 0.54407537 0.5677227  0.5198862  0.02902849
  0.48906487 0.48881307 0.57762206 0.5112339  0.3334239  0.4957247
  0.57031643]
 [0.7981631  0.43143782 0.45985633 0.37442598 0.29691315 0.3247861
  0.39867118 0.3739838  0.32161254 0.35282326 0.44074    0.01390227
  0.38992885 0.4204553  0.4785824  0.5056368  0.5476452  0.5716189
  0.5095479 ]
 [0.8362507  0.19783163 0.25550053 0.11427083 0.25091526 0.08974446
  0.17935082 0.15987818 0.22249287 0.14089608 0.31995615 0.00310874
  0.220519   0.26627004 0.31870133 0.58897525 0.71559083 0.61635077
  0.38827002]
 [0.75897545 0.58678746 0.6025832  0.61418056 0.5552989  0.5556404
  0.591733   0.58673996 0.58832514 0.54130304 0.5493976  0.02403357