In [59]:
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, random_split, DataLoader

In [60]:
using_pretrained_model = False

# Data Pre Processing


In [61]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

class NutritionDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = img_path.split(os.path.sep)[1]

        if self.transform:
            image = self.transform(image)
        else:
            image = transform(image)

        return image, label

In [62]:
# Create images paths
print("Creating image paths list...")
dir_path = 'raw_images'

image_paths = []

for dish_dir in os.listdir(dir_path):
    img_dir = os.path.join(dir_path, dish_dir, 'frames_sampled30')

    if not os.path.exists(img_dir):
        continue
    
    for image in os.listdir(img_dir):
        img_path = os.path.join(img_dir, image)
        image_paths.append(img_path)

print(len(image_paths))

Creating image paths list...
54845


In [None]:
# Calculating statistics (DO NOT RUN! WILL TAKE FOREVER)
# stats_dataset = NutritionDataset(image_paths)
# data_loader = DataLoader(stats_dataset, batch_size=32, shuffle=False)

# mean = 0.0
# std = 0.0
# total_images_count = 0

# for images, _ in data_loader:
#     batch_samples = images.size(0)
#     images = images.view(batch_samples, images.size(1), -1)
#     mean += images.mean(dim=2).sum(dim=0)
#     std += images.std(dim=2).sum(dim=0)
#     total_images_count += batch_samples

# mean /= total_images_count
# std /= total_images_count

# print(f"Mean: {mean}")
# print(f"Std: {std}")

Mean: tensor([0.5005, 0.4726, 0.3732])
Std: tensor([0.2193, 0.2296, 0.2398])


In [64]:
# Transforms/Data Augmentation

input_size = (225, 225)

if using_pretrained_model:
    # Replace with the pretrained model's stats
    data_normals = {
        'mean': [0.485, 0.456, 0.406],
        'std': [0.229, 0.224, 0.225]
    }
else:
    # mean and std of the entire dataset
    data_normals = {
        'mean': [0.5005, 0.4726, 0.3732],
        'std': [0.2193, 0.2296, 0.2398]
    }
    

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(input_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.1
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=data_normals['mean'],
        std=data_normals['std']
    )
])

test_transform = transforms.Compose([
    transforms.Resize(input_size),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=data_normals['mean'],
        std=data_normals['std']
    )
])

In [65]:
train_test_split = [0.7, 0.3]
train_imgs, test_imgs = random_split(image_paths, train_test_split)

train_set = NutritionDataset(train_imgs, transform=train_transform)
test_set = NutritionDataset(test_imgs, transform=test_transform)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

# Checking Train loader
print("Train Loader...")
for i, (images, labels) in enumerate(train_loader):
    if i == 5:
        break
    print(images.shape, len(labels))

# Checking Test loader
print("Test Loader...")
for i, (images, labels) in enumerate(test_loader):
    if i == 5:
        break
    print(images.shape, len(labels))




Train Loader... 

torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
Test Loader...
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
