## Imports

In [16]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms as T, models
from PIL import Image
from pathlib import Path
from sklearn.model_selection import train_test_split
# from albumentations import *
# from albumentations.pytorch import ToTensor

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [4]:
dir(transforms.transforms)

['CenterCrop',
 'ColorJitter',
 'Compose',
 'F',
 'FiveCrop',
 'Grayscale',
 'Image',
 'Iterable',
 'Lambda',
 'LinearTransformation',
 'Normalize',
 'Pad',
 'RandomAffine',
 'RandomApply',
 'RandomChoice',
 'RandomCrop',
 'RandomErasing',
 'RandomGrayscale',
 'RandomHorizontalFlip',
 'RandomOrder',
 'RandomPerspective',
 'RandomResizedCrop',
 'RandomRotation',
 'RandomSizedCrop',
 'RandomTransforms',
 'RandomVerticalFlip',
 'Resize',
 'Scale',
 'Sequence',
 'TenCrop',
 'ToPILImage',
 'ToTensor',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_get_image_size',
 '_pil_interpolation_to_str',
 'accimage',
 'math',
 'np',
 'numbers',
 'random',
 'torch',
 'types',

## Reading/Processing the Data

In [5]:
IMAGE_PATH = Path('./plant-pathology-2020-fgvc7/images')

def image_path(file_stem):
    return IMAGE_PATH/f'{file_stem}.jpg'

In [6]:
train_df = pd.read_csv('./plant-pathology-2020-fgvc7/train.csv')
test_df = pd.read_csv('./plant-pathology-2020-fgvc7/test.csv')

train_paths = train_df['img_file'] = train_df['image_id'].apply(image_path)
test_paths = test_df['img_file'] = test_df['image_id'].apply(image_path)

train_labels = train_df[['healthy','multiple_diseases','rust','scab']]

In [7]:
train_paths, valid_paths, train_labels, valid_labels = train_test_split(
    train_paths, train_labels, test_size = 0.2, random_state=23, stratify = train_labels)
train_paths.reset_index(drop=True,inplace=True)
train_labels.reset_index(drop=True,inplace=True)
valid_paths.reset_index(drop=True,inplace=True)
valid_labels.reset_index(drop=True,inplace=True)

### Creating a custom dataset object

In [22]:
class LeafDataset(Data.Dataset):
    def __init__(self, img_paths, labels, train=True, test=False):
        self.img_paths = img_paths
        self.train = train
        self.test = test
        
        if not self.test:
            self.labels = labels
        
        self.train_transform = T.Compose([T.RandomRotation(60),])
        self.test_transform = T.Compose([])
        self.default_transform = T.Compose([T.ToPILImage(), T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                                         T.ToTensor()]) # ImageNet Stats
        
    def __len__(self):
        return self.img_paths.shape[0]
    
    
    def __getitem__(self, i):
        image = np.array(Image.open(self.img_paths[i]))
        print(image[-1])
        label = self.labels.loc[i, :].values
        image  = self.default_transform(image)
        if self.train:
            image = self.train_transform(image)
        elif self.test:
            image = self.test_transform(image)
        
        return image, label if not self.test else image

In [23]:
train_dataset = LeafDataset(train_paths, train_labels, train=True)

In [24]:
trainloader = Data.DataLoader(train_dataset, shuffle=True, batch_size = 4, num_workers = 1)

In [25]:
next(iter(trainloader))

[[68 82 47]
 [71 85 50]
 [74 88 52]
 ...
 [80 64 49]
 [81 65 49]
 [81 65 49]]


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/Users/anishwalawalkar/miniconda2/envs/plant-pathology/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/Users/anishwalawalkar/miniconda2/envs/plant-pathology/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/Users/anishwalawalkar/miniconda2/envs/plant-pathology/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-22-d327028ce4b1>", line 23, in __getitem__
    image  = self.default_transform(image)
  File "/Users/anishwalawalkar/miniconda2/envs/plant-pathology/lib/python3.7/site-packages/torchvision/transforms/transforms.py", line 61, in __call__
    img = t(img)
  File "/Users/anishwalawalkar/miniconda2/envs/plant-pathology/lib/python3.7/site-packages/torchvision/transforms/transforms.py", line 166, in __call__
    return F.normalize(tensor, self.mean, self.std, self.inplace)
  File "/Users/anishwalawalkar/miniconda2/envs/plant-pathology/lib/python3.7/site-packages/torchvision/transforms/functional.py", line 190, in normalize
    raise TypeError('tensor should be a torch tensor. Got {}.'.format(type(tensor)))
TypeError: tensor should be a torch tensor. Got <class 'PIL.Image.Image'>.


[[156 189 132]
 [155 188 131]
 [154 187 130]
 ...
 [108  92  95]
 [108  92  93]
 [108  92  93]]
[[101 125  47]
 [101 125  47]
 [100 124  46]
 ...
 [140 108  83]
 [140 108  83]
 [140 108  83]]
