In [39]:
import os
import pandas as pd
import torch
import xml.etree.ElementTree as ET
from PIL import Image 
from torchvision.transforms.functional import pil_to_tensor
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [40]:
files_dir = 'dataset_fruit/train_zip/train'
test_dir = 'dataset_fruit/test_zip/test'

images = [image for image in sorted(os.listdir(files_dir))
                        if image[-4:]=='.jpg']
annots = []
for image in images:
    annot = image[:-4] + '.xml'
    annots.append(annot)
    
images = pd.Series(images, name='images')
annots = pd.Series(annots, name='annots')
df = pd.concat([images, annots], axis=1)
df = pd.DataFrame(df)

test_images = [image for image in sorted(os.listdir(test_dir))
                        if image[-4:]=='.jpg']

test_annots = []
for image in test_images:
    annot = image[:-4] + '.xml'
    test_annots.append(annot)

test_images = pd.Series(test_images, name='test_images')
test_annots = pd.Series(test_annots, name='test_annots')
test_df = pd.concat([test_images, test_annots], axis=1)
test_df = pd.DataFrame(test_df)

In [45]:
from torchvision.transforms import Compose, Resize, ToTensor

class FruitImagesDataset(torch.utils.data.Dataset):
    def __init__(self, df=df, files_dir=files_dir, S=7, B=2, C=3, transform=None, image_size=(416, 416)):
        self.annotations = df
        self.files_dir = files_dir
        self.transform = transform
        self.S = S
        self.B = B
        self.C = C
        self.image_size = image_size
        self.resize = Resize(self.image_size)

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        label_path = os.path.join(self.files_dir, self.annotations.iloc[index, 1])
        boxes = []
        tree = ET.parse(label_path)
        root = tree.getroot()
        
        class_dictionary = {'apple':0, 'banana':1, 'orange':2}

        img_path = os.path.join(self.files_dir, self.annotations.iloc[index, 0])
        image = Image.open(img_path)
        image = image.convert("RGB")
        
        img_width, img_height = image.size
        for member in root.findall('object'):
            klass = member.find('name').text
            klass = class_dictionary[klass]
            xmin = int(member.find('bndbox').find('xmin').text)
            xmax = int(member.find('bndbox').find('xmax').text)
            ymin = int(member.find('bndbox').find('ymin').text)
            ymax = int(member.find('bndbox').find('ymax').text)
            
            centerx = ((xmax + xmin) / 2) / img_width
            centery = ((ymax + ymin) / 2) / img_height
            boxwidth = (xmax - xmin) / img_width
            boxheight = (ymax - ymin) / img_height
            
            boxes.append([klass, centerx, centery, boxwidth, boxheight])
        
        boxes = torch.tensor(boxes)
        image = self.resize(image)
        image = ToTensor()(image)

        if self.transform:
            image, boxes = self.transform(image, boxes)

        label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B))
        for box in boxes:
            class_label, x, y, width, height = box.tolist()
            class_label = int(class_label)

            i, j = int(self.S * y), int(self.S * x)
            x_cell, y_cell = self.S * x - j, self.S * y - i

            width_cell, height_cell = (
                width * self.S,
                height * self.S,
            )

            if label_matrix[i, j, self.C] == 0:
                label_matrix[i, j, self.C] = 1

                box_coordinates = torch.tensor(
                    [x_cell, y_cell, width_cell, height_cell]
                )

                label_matrix[i, j, 4:8] = box_coordinates
                label_matrix[i, j, class_label] = 1

        return image, label_matrix

    
def show_image(image):
    plt.imshow(image.permute(1, 2, 0).numpy())

In [46]:

BATCH_SIZE = 16

train_dataset = FruitImagesDataset(
        files_dir=files_dir
    )

test_dataset = FruitImagesDataset(
    files_dir=test_dir
)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=False,
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=False,
)

next(iter(train_loader))

# im, lb = train_dataset.__getitem__(0)

[tensor([[[[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           ...,
           [1.0000, 1.0000, 1.0000,  ..., 0.7255, 0.7137, 0.7020],
           [1.0000, 1.0000, 1.0000,  ..., 0.7176, 0.7020, 0.7020],
           [1.0000, 1.0000, 1.0000,  ..., 0.7176, 0.7020, 0.7020]],
 
          [[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           ...,
           [1.0000, 1.0000, 1.0000,  ..., 0.2392, 0.2471, 0.2353],
           [1.0000, 1.0000, 1.0000,  ..., 0.2314, 0.2353, 0.2353],
           [1.0000, 1.0000, 1.0000,  ..., 0.2314, 0.2353, 0.2353]],
 
          [[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
           [1.0000, 1.00