In [68]:
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import os
import re
from skimage import io
from PIL import Image

In [87]:
N_EPOCHS = 50
BATCH_SIZE = 1
LR = 0.05

In [3]:
NUM_CLASSES = 14
DATA_DIR = '../data/'

## Create a csv with information about the labels

Each row has the filename and all of the potential labels coded as either zeros or ones. (Only need to run this once to create the csv)

In [44]:
header = ["Filename", "baby", "bird", "car", "clouds", "dog", "female", "flower", "male", "night", "people", "portrait", "river", "sea", "tree"]

def image_has_label(image_name, label):
    image_number = re.sub('\D', '', image_name)
    with open("../data/annotations/{}.txt".format(label)) as file:
        for line in file:
            if line.strip() == image_number:
                return 1
    return 0
    

data = []
for image_name in os.listdir("../data/images/"):
    row = []
    row.append(image_name)
    for label in header[1:]:
        row.append(image_has_label(image_name, label))
    data.append(row)


df = pd.DataFrame(data, columns=header)
df.head()
df.to_csv("images_encoded.csv", index=False)


https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

In [47]:
multi_label_df = pd.read_csv("images_encoded.csv")

n = 3
img_name = multi_label_df.iloc[n, 0]
target_labels = multi_label_df.iloc[n, 1:]

print('Image name: {}'.format(img_name))
print('Target shape: {}'.format(target_labels.shape))
print('Labels: {}'.format(target_labels))

print(len(multi_label_df))
multi_label_df.head()

Image name: im6121.jpg
Target shape: (14,)
Labels: baby        0
bird        0
car         0
clouds      0
dog         0
female      1
flower      0
male        0
night       0
people      1
portrait    0
river       0
sea         0
tree        0
Name: 3, dtype: object
20000


Unnamed: 0,Filename,baby,bird,car,clouds,dog,female,flower,male,night,people,portrait,river,sea,tree
0,im5348.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,im10998.jpg,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2,im4424.jpg,0,0,0,0,0,1,0,1,0,1,1,0,0,0
3,im6121.jpg,0,0,0,0,0,1,0,0,0,1,0,0,0,0
4,im13373.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Creates a dataset from the csv created earlier

In [82]:
class MultiLabelDataset(Dataset):
    def __init__(self, csv_file, root_dir="../data/images/", transform=None):
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_name = os.path.join(self.root_dir, self.df.iloc[idx,0])
        image = Image.open(img_name)
        target_labels = self.df.iloc[idx, 1:]
        
        sample = {"image_name": img_name, "image": image, "target_labels": target_labels}
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample
    
class ToTensor(object):
    def __call__(self, sample):
        image_name, image, target_labels = sample["image_name"], sample['image'], sample['target_labels']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        image = torchvision.transforms.functional.to_tensor(image)
        return {'image_name': image_name,
                'image': image,
                'target_labels': torch.tensor(target_labels.values.astype(np.int32))}

In [85]:
transform = transforms.Compose([ToTensor()])
image_dataset = MultiLabelDataset(csv_file="images_encoded.csv", transform=transform)

#print(image_dataset[3]["image_name"])

for i in range(len(image_dataset)):
    sample = image_dataset[i]
    
    print(i, sample["image"].shape, sample["target_labels"].shape)
    
    if i == 4:
        break

0 torch.Size([3, 128, 128]) torch.Size([14])
1 torch.Size([3, 128, 128]) torch.Size([14])
2 torch.Size([3, 128, 128]) torch.Size([14])
3 torch.Size([1, 128, 128]) torch.Size([14])
4 torch.Size([3, 128, 128]) torch.Size([14])


A single entry in the dataset has the attributes image_name, image and target_labels

image_name is a string

image is a tensor with dimension [1 or 3, 128, 128] (depending on if it's grayscale or RGB)

target_labels is a tensor of size [14]

In [91]:
image_loader = DataLoader(image_dataset, batch_size=BATCH_SIZE, shuffle=True)

dataiter = iter(image_loader)
batch = next(dataiter)

print(batch["image"].shape)
print(batch["target_labels"].shape)

torch.Size([1, 3, 128, 128])
torch.Size([1, 14])


The loader adds one dimension to allow batching:

images: torch.Size([batch_size, 3, 128, 128])

target_labels torch.Size([batch_size, 14])