A lot of the code is from here:
* https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
* https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

In [38]:
import os

import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets
from torchvision.io import read_image
from torchvision.transforms import ToTensor

In [48]:
simple_transforms = torch.nn.Sequential(
    torchvision.transforms.Resize(size=(256, 256)),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
)

In [49]:
class CocoNoCropping(Dataset):
    def __init__(self, img_ids, my_annotations_file, img_dir,
                 transform=None, target_transform=None, divide_by_255=False):

        # dataDir, dataType, annFile, 
        # self.coco_datatype = coco_datatype
        # self.coco_annotations_file = 
        
        with open(img_ids, 'r') as f:
            self.img_ids = json.loads(f.read()) # python list saved as txt IDs as ints
        with open(my_annotations_file, 'r') as f:
            self.ids_to_labels = json.load(f)   # python dictionary saved as json IDs as strings
            
        self.img_dir = img_dir
        
        # target transforms
        self.target_transform = target_transform
        # image transforms
        self.transform = transform
        self.divide_by_255 = divide_by_255
        

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = str(self.img_ids[idx])
        img_name = (12-len(img_id)) * '0' + img_id + '.jpg'
        
        img_path = os.path.join(self.img_dir, img_name)
        image = read_image(img_path)
        
        if self.divide_by_255:
            image = image / 255.0
        
        label = self.ids_to_labels[img_id]
        if self.transform:
            image = self.transform(image)
            
        if self.target_transform:
            label = self.target_transform(label)
        sample = {"image": image, "label": label}
        return sample

In [50]:
%ls ../my_splits/

dev_ids.txt     test_ids.txt    train1_ids.txt  train_ids.txt


In [51]:
%ls ../my_annotations/

imgIds_to_labels_train2017.json     strCatNms_to_imgIds_train2017.json
imgIds_to_labels_val2017.json       strCatNms_to_imgIds_val2017.json


In [52]:
%ls ../images/

[34mtrain2017[m[m/ [34mval2017[m[m/


In [53]:
train1_dataset = CocoNoCropping(
    img_ids = '../my_splits/train1_ids.txt',
    my_annotations_file = '../my_annotations/imgIds_to_labels_train2017.json',
    img_dir = '../images/train2017/',
    transform=simple_transforms,
    divide_by_255=True
)

In [61]:
train1_dataloader = DataLoader(train1_dataset, batch_size=8, shuffle=True)

In [62]:
next(iter(train1_dataloader))

{'image': tensor([[[[ 1.5667,  1.7109,  1.9064,  ..., -0.5681, -0.3712, -0.1928],
           [ 1.3752,  1.6708,  1.8592,  ..., -0.6110, -0.4140, -0.2556],
           [ 1.1756,  1.5551,  1.7979,  ..., -0.6381, -0.4268, -0.2728],
           ...,
           [ 0.6294,  0.6422,  0.5908,  ...,  0.1252,  0.0684,  0.0626],
           [ 0.5878,  0.5836,  0.5750,  ...,  0.0230,  0.0228,  0.1595],
           [ 0.5422,  0.5364,  0.5379,  ..., -0.0472,  0.0898,  0.0727]],
 
          [[ 0.3233,  0.6997,  1.1622,  ..., -1.9832, -2.0313, -2.0284],
           [ 0.0690,  0.5418,  1.0015,  ..., -1.9876, -2.0357, -2.0357],
           [-0.2548,  0.1957,  0.7474,  ..., -1.9846, -2.0357, -2.0211],
           ...,
           [-0.1784, -0.1594, -0.1770,  ..., -0.5829, -0.6409, -0.6469],
           [-0.2325, -0.1975, -0.1931,  ..., -0.6481, -0.6789, -0.5827],
           [-0.2792, -0.2456, -0.2310,  ..., -0.7197, -0.6104, -0.6716]],
 
          [[-0.0674,  0.6530,  1.1817,  ..., -1.7609, -1.6824, -1.6548],
    

In [65]:
next(iter(train1_dataloader))['image'].min(), next(iter(train1_dataloader))['image'].max()

(tensor(-2.1179), tensor(2.6400))

In [66]:
for batch in train1_dataloader:
    inputs = batch['image'] # [batch size, channel, h, w]
    print(inputs.shape, inputs.dtype)
    
    # right now, `labels` is a list with num_classes tensors, each tensor with batch-size labels
    
    labels = torch.vstack(batch['label']).T.contiguous() 
    print(labels.shape, labels.dtype, labels.is_contiguous()) # labels
    print()
    #break

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([8, 3, 256, 256]) torch.float32
torch.Size([8, 4]) torch.int64 True

torch.Size([4, 3, 256, 256]) torch.float