In [26]:
import torch
import os
from PIL import Image
from torch.utils.data import Dataset, TensorDataset
from sklearn.preprocessing import LabelEncoder

In [47]:
class CustomTensorDataset(Dataset):
    def __init__(self, root_dir, csv_file, transform=None):
        self.data = csv_file
        self.root_dir = root_dir
        self.transform = transform
        self.le = LabelEncoder()
        self.le.fit(self.data.breed.unique())
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data.loc[idx, "id"] + ".jpg")
        image = Image.open(img_name)
        label_int = torch.tensor(self.le.transform([self.data.loc[idx, "breed"]]), dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label_int

In [48]:
import pandas as pd
labels = pd.read_csv('../data/labels.csv')

In [49]:
custom_data = CustomTensorDataset(root_dir='../data/train', csv_file=labels, transform=None)

In [54]:
from torch.utils.data import random_split
train_len = int(0.8 * len(custom_data))
test_len = len(custom_data) - train_len
train_set, test_set = random_split(custom_data, [train_len, test_len])

In [56]:
image, label = train_set[0]
label

tensor([59])

In [37]:
le = LabelEncoder()
le.fit(labels.breed.unique())

In [45]:
k = le.transform([labels.loc[0, 'breed']])

In [46]:
j = le.inverse_transform(k)
j

array(['boston_bull'], dtype=object)

In [57]:
train_set[:3]

TypeError: join() argument must be str, bytes, or os.PathLike object, not 'Series'

In [62]:
train_load = torch.load('../intermediates/train.pt')
train_img, train_lbl = train_load

In [63]:
train_set = TensorDataset(train_load, train_img)

torch.Size([8177, 3, 224, 224])

In [65]:
train_lbl.shape

torch.Size([8177])