In [1]:
import pandas as pd
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import gdown
import shutil

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# download files from gdrive

In [2]:
%%time
gdown.download('https://drive.google.com/file/d/1JhvXIm23pgpce4CnMowaU-UxquT2n7Lb/view?usp=sharing', 
               'dataset.csv', quiet=False,fuzzy=True)

Downloading...
From: https://drive.google.com/uc?id=1JhvXIm23pgpce4CnMowaU-UxquT2n7Lb
To: /Volumes/T7/dataset.csv
100%|████████████████████████████████████████| 814k/814k [00:00<00:00, 3.36MB/s]

CPU times: user 49.8 ms, sys: 39.7 ms, total: 89.4 ms
Wall time: 1.62 s





'dataset.csv'

In [3]:
%%time
gdown.download('https://drive.google.com/file/d/1nizDDIoYYOEl4cD17ftdy0VrEI6TYPUB/view?usp=sharing', 
               quiet=False,fuzzy=True)

Downloading...
From (uriginal): https://drive.google.com/uc?id=1nizDDIoYYOEl4cD17ftdy0VrEI6TYPUB
From (redirected): https://drive.google.com/uc?id=1nizDDIoYYOEl4cD17ftdy0VrEI6TYPUB&confirm=t&uuid=d4de65ef-8e39-49f2-8325-706aa46d4a3e
To: /Volumes/T7/dataset.zip
100%|██████████████████████████████████████| 1.61G/1.61G [05:48<00:00, 4.62MB/s]


CPU times: user 9.94 s, sys: 24.2 s, total: 34.1 s
Wall time: 5min 50s


'dataset.zip'

# unzip files

In [4]:
%%time
shutil.unpack_archive('dataset.zip', '')

CPU times: user 6.62 s, sys: 7.46 s, total: 14.1 s
Wall time: 1min 34s


In [6]:
os.remove('dataset.zip')

# dataset

In [7]:
class FoodDataset(Dataset):

    def __init__(self, path_to_csv: str, path_to_imgs: str, is_read_all: bool, scale: int):
        self.nutr_info = pd.read_csv(path_to_csv)
        self.path_to_imgs = path_to_imgs
        self.is_read_all = is_read_all
        self.scale = scale
        if self.is_read_all:
            self.images = [self.transform(Image.open(f'{self.path_to_imgs}/{i}.png')) 
                           for i in range(len(self.nutr_info))]

    def __len__(self):
        return len(self.self.nutr_info)
    
    def transform(self, img):
        size = min(img.size)
        return transforms.Compose([transforms.CenterCrop(size), 
                                   transforms.Resize(self.scale), 
                                   transforms.ToTensor()])(img)
    
    def __getitem__(self, idx):
        sample = dict(self.nutr_info.iloc[idx])
        if self.is_read_all:
            image = self.images[idx]
        else:
            image = self.transform(Image.open(f'{self.path_to_imgs}/{idx}.png'))
        sample['image'] = image
        return sample

In [10]:
%%time
fd = FoodDataset(path_to_csv='dataset.csv', path_to_imgs='dataset', is_read_all=False, scale=64)
fd[300]

CPU times: user 28.9 ms, sys: 6.78 ms, total: 35.6 ms
Wall time: 85.8 ms


{'mass': 100.0,
 'kcal_total': 163.0,
 'prot_total': 20.3,
 'fat_total': 9.1,
 'carb_total': 0.1,
 'kcal_100': 163.0,
 'prot_100': 20.3,
 'fat_100': 9.1,
 'carb_100': 0.1,
 'text': 'Salmon, smoked, in slices, Fish',
 'image': tensor([[[0.1843, 0.2784, 0.2941,  ..., 0.1804, 0.1725, 0.1647],
          [0.2667, 0.2902, 0.3216,  ..., 0.1608, 0.1529, 0.1529],
          [0.1765, 0.2471, 0.3451,  ..., 0.1059, 0.1020, 0.0980],
          ...,
          [0.8627, 0.8627, 0.8863,  ..., 0.8196, 0.8196, 0.8157],
          [0.8745, 0.8706, 0.8784,  ..., 0.8235, 0.8157, 0.8118],
          [0.8824, 0.8824, 0.8863,  ..., 0.8078, 0.8078, 0.8000]],
 
         [[0.1098, 0.1765, 0.2000,  ..., 0.3176, 0.3059, 0.2941],
          [0.1725, 0.1843, 0.2471,  ..., 0.2980, 0.2902, 0.2824],
          [0.1098, 0.1686, 0.2863,  ..., 0.1961, 0.1922, 0.1882],
          ...,
          [0.7961, 0.7882, 0.8235,  ..., 0.6314, 0.6353, 0.6118],
          [0.8706, 0.8667, 0.8706,  ..., 0.6353, 0.6078, 0.5804],
          [0.890

In [11]:
%%time
fd = FoodDataset(path_to_csv='dataset.csv', path_to_imgs='dataset', is_read_all=True, scale=64)
fd[300]

CPU times: user 33.7 s, sys: 1.51 s, total: 35.2 s
Wall time: 1min 56s


{'mass': 100.0,
 'kcal_total': 163.0,
 'prot_total': 20.3,
 'fat_total': 9.1,
 'carb_total': 0.1,
 'kcal_100': 163.0,
 'prot_100': 20.3,
 'fat_100': 9.1,
 'carb_100': 0.1,
 'text': 'Salmon, smoked, in slices, Fish',
 'image': tensor([[[0.1843, 0.2784, 0.2941,  ..., 0.1804, 0.1725, 0.1647],
          [0.2667, 0.2902, 0.3216,  ..., 0.1608, 0.1529, 0.1529],
          [0.1765, 0.2471, 0.3451,  ..., 0.1059, 0.1020, 0.0980],
          ...,
          [0.8627, 0.8627, 0.8863,  ..., 0.8196, 0.8196, 0.8157],
          [0.8745, 0.8706, 0.8784,  ..., 0.8235, 0.8157, 0.8118],
          [0.8824, 0.8824, 0.8863,  ..., 0.8078, 0.8078, 0.8000]],
 
         [[0.1098, 0.1765, 0.2000,  ..., 0.3176, 0.3059, 0.2941],
          [0.1725, 0.1843, 0.2471,  ..., 0.2980, 0.2902, 0.2824],
          [0.1098, 0.1686, 0.2863,  ..., 0.1961, 0.1922, 0.1882],
          ...,
          [0.7961, 0.7882, 0.8235,  ..., 0.6314, 0.6353, 0.6118],
          [0.8706, 0.8667, 0.8706,  ..., 0.6353, 0.6078, 0.5804],
          [0.890