## Imports

In [226]:
%matplotlib inline
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import re
import pandas as pd
from io import StringIO

from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import os

from sklearn.metrics import f1_score

torch.__version__

'2.1.0'

## Variables

In [227]:
IMAGE_PATH = './data/'
TRAINING_DATA_PATH = './train.csv'
TEST_DATA_PATH = './test.csv'
LABEL_COUNT = 19
BATCH_SIZE = 1
IS_SHUFFLE = True
IMAGE_SHAPE = 224
THRESHOLD = 0.5

## CUDA

In [228]:
use_cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('We are using GPU.' if use_cuda else 'We are using CPU.')

We are using CPU.


## Loading csv file

In [229]:
with open(TRAINING_DATA_PATH) as file:
    lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
    df_train = pd.read_csv(StringIO(''.join(lines)), escapechar="/")

with open(TEST_DATA_PATH) as file:
    lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
    df_test = pd.read_csv(StringIO(''.join(lines)), escapechar="/")

print(df_train.shape)
print(df_train[:2])

print(df_test.shape)
print(df_train[:2])


(29996, 3)
  ImageID Labels                                            Caption
0   0.jpg      1   Woman in swim suit holding parasol on sunny day.
1   1.jpg   1 19  A couple of men riding horses on top of a gree...
(10000, 2)
  ImageID Labels                                            Caption
0   0.jpg      1   Woman in swim suit holding parasol on sunny day.
1   1.jpg   1 19  A couple of men riding horses on top of a gree...


## Create Customize Data Type

In [230]:
from pandas import DataFrame

def image_stats(checking_dataloader):
  sum_channels, sumsq_channels, n_batches = 0, 0, 0

  for step, (x, _, _, _ ) in enumerate(checking_dataloader):
    sum_channels += torch.mean(x, dim = [0, 2, 3])
    sumsq_channels += torch.mean(x**2 , dim = [0, 2, 3])
    n_batches += 1

  means = sum_channels/n_batches
  stdevs = (sumsq_channels/n_batches - means**2)**0.5

  return means, stdevs

means, stdevs = image_stats(train_dataset)


transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(IMAGE_SHAPE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SHAPE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class ImageDataset(Dataset):
    def __init__(self, df: DataFrame, imagePath:str = IMAGE_PATH, transform=None, train=True):
        self.df = df
        self.imagePath = imagePath
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
                    
        img_path = os.path.join(self.imagePath, 
                                self.df.iloc[idx, self.df.columns.get_loc('ImageID')])
        
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        img_id = self.df.iloc[idx, self.df.columns.get_loc('ImageID')]
        caption = self.df.iloc[idx, self.df.columns.get_loc('Caption')]

        if self.train:
            labels = self.df.iloc[idx, self.df.columns.get_loc('Labels')]

            label_tensor = torch.zeros(19)
            for label in labels.split(" "):
                label_tensor[int(label)-1] = 1
            sample = (image, label_tensor, img_id, caption)
        else:
            sample = (image, img_id, caption)
            
        return sample

dataset = ImageDataset(
    df= df_train, imagePath = IMAGE_PATH, transform = transform, train = True)

TRAIN_DATA_LENGTH = 10000
VALIDATION_DATA_LENGTH = 2000
REST = len(dataset) - TRAIN_DATA_LENGTH - VALIDATION_DATA_LENGTH 
train_dataset, val_dataset, rest_dataset = random_split(dataset, [TRAIN_DATA_LENGTH, VALIDATION_DATA_LENGTH, REST])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = ImageDataset(df= df_test, imagePath = IMAGE_PATH, transform = transform, train = False)

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True) 
print(len(test_loader)) 


10000
2000
10000


## Create CNN

In [231]:
def compute_output(input_size, stride_size, padding_size, kernel_size):
    return ((input_size + 2 * padding_size - kernel_size) / stride_size) + 1

after_conv1 = compute_output(IMAGE_SHAPE, 2, 1,4)
after_max_pool1 = after_conv1 / 2
after_conv2 = compute_output(after_max_pool1, 1, 2 ,3)
after_max_pool2 = after_conv2 / 2
final_image_dim = int(after_max_pool2)
print(final_image_dim)

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()  # Compulsory operation.
        self.conv1 = nn.Conv2d(3, 32, 4, stride=2, padding=1)
        self.conv2 = nn.Conv2d(32, 48, 3, stride=1, padding=2)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(48*final_image_dim*final_image_dim, 64)
        self.fc2 = nn.Linear(64, LABEL_COUNT)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        # x = torch.flatten(x, 1)
        x = x.view(-1, 48*final_image_dim*final_image_dim)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        logits = self.fc2(x)
        return logits, None

model = ConvNet().to(device)
print('Model initialized.')

29
Model initialized.


## Optimizer and Loss Function

In [232]:
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
cross_entropy = nn.CrossEntropyLoss()

## Train

In [237]:
def train_iter(model, device, optimizer, loss_func, data, target):
    '''
    Train the model for a single iteration.
    An iteration is when a single batch of data is passed forward and
    backward through the neural network.
    '''
    data, target = data.to(device), target.to(device)  # Move this batch of data to the specified device.
    output, _ = model(data)  # Forward the data through the model.
    loss = loss_func(output, target)
    optimizer.zero_grad()  # Zero out the old gradients (so we only use new gradients for a new update iteration).
    loss.backward()  # Backward the loss and calculate gradients for parameters.
    optimizer.step()  # Update the parameters.
    preds = torch.sigmoid(output).detach().numpy()
    return preds

def train_epoch(log_interval, model, device, train_loader, optimizer, epoch, loss_func, threshold=0.5):
    '''
    Train the model for an epoch.
    An epoch is when the entire dataset is passed forward and
    backward through the neural network for once.
    The number of batches in a dataset is equal to number of iterations for one epoch.
    '''
    model.train()
    all_preds = []
    all_targets = []
    for batch_idx, (data, target, _, captions) in enumerate(train_loader):  # Iterate through the entire dataset to form an epoch.
        preds = train_iter(model, device, optimizer, loss_func, data, target)  # Train for an iteration.
        print(preds)
        binary_preds = np.zeros(19)
        # print(binary_preds.shape)
        for i, val in enumerate(np.array(preds[0])):
            if (val > THRESHOLD):
                binary_preds[i] = 1
        print(binary_preds)
        # print(target.cpu().numpy()[0])
        all_preds.extend(binary_preds)
        all_targets.extend(target.cpu().numpy()[0])

        if batch_idx % log_interval == 0:
            print(len(all_preds), len(all_targets))
            f1 = f1_score(all_targets, all_preds, average='micro')
            print('Train Epoch: {} F1 Score: {:.4f}'.format(epoch, f1))
            # all_preds.clear()
            # all_targets.clear()

log_interval = 50
epochs = 1

for epoch in range(1, epochs + 1):
    train_epoch(log_interval, model, device, train_loader, optimizer, epoch, cross_entropy)

torch.save(model.state_dict(), './model')

[[8.76779377e-01 2.53152192e-01 5.39008737e-01 2.57921427e-01
  1.37010083e-01 2.98643202e-01 2.35460922e-01 3.06458652e-01
  1.95348799e-01 2.77617455e-01 1.22607484e-01 6.50898946e-05
  1.29844904e-01 6.11778870e-02 4.05772805e-01 2.95513391e-01
  3.10501754e-01 3.71944994e-01 2.21133381e-01]]
[1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
19 19
Train Epoch: 1 F1 Score: 0.9474
[[8.77477646e-01 2.87967592e-01 5.36746681e-01 3.27285916e-01
  1.58078134e-01 2.47742593e-01 2.05988377e-01 3.19870055e-01
  1.82227343e-01 3.06383491e-01 9.13092792e-02 7.95640244e-06
  1.18672356e-01 4.18341607e-02 3.83122087e-01 2.85091043e-01
  2.76916176e-01 3.95470649e-01 2.07130641e-01]]
[1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[9.1799772e-01 2.3461141e-01 5.7668597e-01 2.8742456e-01 1.9448256e-01
  2.0430902e-01 2.1901026e-01 3.5038775e-01 1.5382008e-01 2.3656911e-01
  7.0473380e-02 5.5424498e-06 8.1429273e-02 1.6504738e-02 3.4351969e-01
  1.9479825e-01 2.7177358e-01 3.72

KeyboardInterrupt: 

## Test

In [239]:
def test(model, device, test_loader):
    '''
    Testing the model on the entire test set.
    '''
    model.eval()
    test_all_preds = []
    test_all_targets = []
    with torch.no_grad():
        for data, target, _, _ in test_loader:
            data, target = data.to(device), target.to(device)
            output, _ = model(data)
            preds = torch.sigmoid(output).detach().numpy()
            print(preds)
            binary_preds = np.zeros(19)
            for i, val in enumerate(np.array(preds[0])):
                if (val > THRESHOLD):
                    binary_preds[i] = 1
            test_all_preds.extend(binary_preds)
            test_all_targets.extend(target.cpu().numpy()[0])
    # Convert probabilities to binary predictions using a threshold (e.g., 0.5)
    f1 = f1_score(test_all_targets, test_all_preds, average='micro')  # Use 'micro' for multi-label classification
    
    print('\nTest set: F1 Score: {:.4f}\n'.format(f1))

test(model, device, val_loader)

[[8.88200998e-01 2.05297008e-01 5.12846112e-01 2.89795399e-01
  1.24486186e-01 1.87811196e-01 1.64209530e-01 2.42667675e-01
  1.00276068e-01 2.32245788e-01 9.13227275e-02 8.85901954e-06
  9.86451283e-02 3.07158120e-02 4.13052648e-01 2.72232533e-01
  3.39973122e-01 4.06632543e-01 2.03177556e-01]]
[[8.7745643e-01 2.2155693e-01 5.1183522e-01 2.9819536e-01 1.4929198e-01
  2.0526250e-01 1.9883719e-01 2.7220702e-01 1.2616785e-01 2.5274310e-01
  9.7292691e-02 2.1247692e-05 1.0994256e-01 3.4512497e-02 4.0412167e-01
  2.6586851e-01 3.1738558e-01 3.7169075e-01 2.1227854e-01]]
[[8.5927582e-01 2.4724998e-01 5.1205552e-01 2.8708449e-01 2.2752823e-01
  2.6161143e-01 2.4768361e-01 3.3579263e-01 1.9334355e-01 2.7278677e-01
  1.1242802e-01 1.2391539e-04 1.3661657e-01 4.7172818e-02 3.7732300e-01
  2.4582782e-01 2.7176151e-01 3.3923191e-01 2.3397985e-01]]
[[8.5267258e-01 2.7377677e-01 5.2711040e-01 3.1175211e-01 2.7007812e-01
  2.9818362e-01 2.7487060e-01 3.6762494e-01 2.2863811e-01 2.9450014e-01
  1.407

KeyboardInterrupt: 

## Output

In [235]:
import csv

model.eval()
predictions = []

with torch.no_grad():
    for data, img_ids, _ in test_loader:
        data = data.to(device)
        output, _ = model(data)
        preds = torch.sigmoid(output).detach().numpy()[0]  # Convert logits to probabilities using sigmoid
        binary_preds = (preds > THRESHOLD).astype(int)
        labels = [' '.join([str(idx+1) for idx, val in enumerate(binary_preds) if val == 1])]
        predictions.extend(zip(img_ids, labels))


# Export predictions to CSV
output_file = 'predictions.csv'
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['ImageID', 'Labels'])
    writer.writerows(predictions)

print(f"Predictions exported to {output_file}.")


Predictions exported to predictions.csv.
