<a href="https://colab.research.google.com/github/MarioAvolio/FoodX-251-Classification/blob/main/FoodX_251_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Performing data augmentation on a batch of images and the need for collate_fn

Define the Dataset class, which takes the input images, their classes, and
the augmentation object as initializers:

In [2]:
!pip install -q torch_snippets
from torch_snippets import *
from torchvision.datasets import MNIST
from torchvision import transforms
from google.colab import drive
import pandas as pd
import numpy as np
import cv2
device = 'cuda' if torch.cuda.is_available() else 'cpu'

!pip install torch_summary
from torchsummary import summary

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m202.5/202.5 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m80.6 MB/s[0m e

In [3]:
NUM_CLASSES=251

G_DRIVE='/content/gdrive/'
PROJ_PATH= G_DRIVE+"MyDrive/Visual-Proj/"

# ----------------- tar
TRAIN_PATH = PROJ_PATH + "train.tar"
VAL_PATH = PROJ_PATH + "val.tar"
VAL_DEGRADED_PATH = PROJ_PATH + "val_degraded.zip"
NOISE_TRAINING_DATA_PATH = PROJ_PATH + "noise.zip"
ANNOTATION_PATH = PROJ_PATH + "annot.tar"


# ---------------- local data extracted
TRAIN_PATH_LOCAL="/content/train_set/"
VAL_DEGRADED_PATH_LOCAL="/content/val_set_degraded/"
VAL_PATH_LOCAL="/content/val_set/"
NOISED_PATH_LOCAL="/content/noise/"


# --------------- csv file
ANNOTATION_PATH_CLASS_LOCAL="/content/class_list.txt"
ANNOTATION_PATH_VALIDATION_CLEANED="/content/val_info.csv"
ANNOTATION_PATH_TRAIN_CLEANED=PROJ_PATH+"train_info_cleaned.csv"
ANNOTATION_PATH_BALANCED_TRAIN = PROJ_PATH + "balanced_train_info.csv"






In [4]:
def get_img_from_path(path):
  return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)  


In [5]:
drive.mount(G_DRIVE, force_remount=True)

Mounted at /content/gdrive/


In [6]:
!tar -xf $TRAIN_PATH --skip-old-files 
!tar -xf $VAL_PATH --skip-old-files 
!tar -xf $ANNOTATION_PATH --skip-old-files 
!unzip -nqq $VAL_DEGRADED_PATH  
!unzip -nqq $NOISE_TRAINING_DATA_PATH  

In [7]:
train_info = pd.read_csv(ANNOTATION_PATH_BALANCED_TRAIN).sample(frac=0.1).reset_index(drop=True) # shuffle
name_to_type_train=train_info.set_index("NAME").to_dict()["TYPE"]
type_to_name_train = {v: k for k, v in name_to_type_train.items()}
# type_to_name_train, train_info

In [8]:
train_info

Unnamed: 0,NAME,TYPE,NOISED
0,train_034633.jpg,95,1
1,train_091111.jpg,206,0
2,train_111232.jpg,231,0
3,train_052880.jpg,116,0
4,train_094673.jpg,74,0
...,...,...,...
12821,train_085695.jpg,8,0
12822,train_029597.jpg,80,0
12823,train_101034.jpg,174,0
12824,train_042681.jpg,45,0


In [9]:
colnames=['NAME', 'TYPE'] 
val_info = pd.read_csv(ANNOTATION_PATH_VALIDATION_CLEANED, index_col=False, header=None, names=colnames)
name_to_type_val=val_info.set_index("NAME").to_dict()["TYPE"]
type_to_name_val = {v: k for k, v in name_to_type_val.items()}
# type_to_name_val

In [10]:
val_info = val_info.sample(frac=0.1).reset_index(drop=True) # shuffle

In [11]:
val_info

Unnamed: 0,NAME,TYPE
0,val_001413.jpg,85
1,val_004268.jpg,179
2,val_007646.jpg,85
3,val_005970.jpg,33
4,val_010661.jpg,241
...,...,...
1194,val_000552.jpg,178
1195,val_005690.jpg,41
1196,val_008170.jpg,112
1197,val_009807.jpg,11


In [12]:
def initilize_path_into_val_dataset(row):
  return VAL_PATH_LOCAL + row.NAME

In [13]:
val_info["PATH"] = 0
val_info["PATH"] = val_info.apply(initilize_path_into_val_dataset, axis=1)

In [14]:
val_info

Unnamed: 0,NAME,TYPE,PATH
0,val_001413.jpg,85,/content/val_set/val_001413.jpg
1,val_004268.jpg,179,/content/val_set/val_004268.jpg
2,val_007646.jpg,85,/content/val_set/val_007646.jpg
3,val_005970.jpg,33,/content/val_set/val_005970.jpg
4,val_010661.jpg,241,/content/val_set/val_010661.jpg
...,...,...,...
1194,val_000552.jpg,178,/content/val_set/val_000552.jpg
1195,val_005690.jpg,41,/content/val_set/val_005690.jpg
1196,val_008170.jpg,112,/content/val_set/val_008170.jpg
1197,val_009807.jpg,11,/content/val_set/val_009807.jpg


In [15]:
def initilize_path_into_dataset(row):
  if row.NOISED == 0:
    row.PATH = TRAIN_PATH_LOCAL + row.NAME
  elif row.NOISED == 1:
    row.PATH = NOISED_PATH_LOCAL + row.NAME

  return row.PATH

In [16]:
train_info["PATH"] = 0
train_info["PATH"] = train_info.apply(initilize_path_into_dataset, axis=1)

In [17]:
train_info

Unnamed: 0,NAME,TYPE,NOISED,PATH
0,train_034633.jpg,95,1,/content/noise/train_034633.jpg
1,train_091111.jpg,206,0,/content/train_set/train_091111.jpg
2,train_111232.jpg,231,0,/content/train_set/train_111232.jpg
3,train_052880.jpg,116,0,/content/train_set/train_052880.jpg
4,train_094673.jpg,74,0,/content/train_set/train_094673.jpg
...,...,...,...,...
12821,train_085695.jpg,8,0,/content/train_set/train_085695.jpg
12822,train_029597.jpg,80,0,/content/train_set/train_029597.jpg
12823,train_101034.jpg,174,0,/content/train_set/train_101034.jpg
12824,train_042681.jpg,45,0,/content/train_set/train_042681.jpg


In [18]:
from torch.utils.data import Dataset, DataLoader
import PIL
import torchvision.transforms.functional as fn

class FoodDataset(Dataset):
  def __init__(self, x, y, aug=None):
    self.y = y
    self.x = x 
    self.aug = aug
    self.img_size=224
    # Normalize does the following for each channel:
      # image = (image - mean) / std
    self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])

  def __preprocess_image(self, im):

    im = cv2.resize(im, (self.img_size, self.img_size))
    
    # Specify that augmentation must be done if the augmentation object is
    # provided. This is useful is we need to perform augmentation on
    # training data but not on validation data
    if self.aug: im=self.aug.augment_image(im)

    #  While leveraging pre-trained models, it is mandatory to resize,
    # permute, and then normalize images (as appropriate for that pretrained model), where the images are first scaled to a value between
    # 0 and 1 across the 3 channels and then normalized to a mean of
    # [0.485, 0.456, 0.406] and a standard deviation of [0.229, 0.224, 0.225]
    # across the RGB channels.
    im = torch.tensor(im).permute(2,0,1)
    im = self.normalize(im/255.)
    return im[None]
    
  def __getitem__(self, ix):
    f = self.x[ix]
    target = self.y[ix]

    im = cv2.imread(f) 
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

    #return im.float().to(device), torch.tensor([target]).float().to(device)
    return im, target

  def __len__(self): return len(self.x)

  # In general, we leverage the collate_fn method when we have to
  # perform heavy computations. This is because performing such
  # computations on a batch of images in one go is faster than doing it
  # one image at a time.

  # Define collate_fn, which takes the batch of data as input:
  def collate_fn(self, batch):
    """
       batch: is a list of tuples with (example, label)
             where 'example' is a tensor of arbitrary shape
             and label is scalar
    """

    ims, classes = [], []
    for im, target in batch:
      im = self.__preprocess_image(im)
      ims.append(im)
      classes.append(float(target))

    classes = [torch.tensor(x).to(device).float() for x in classes]
    classes = torch.stack(classes).type(torch.LongTensor).to(device)
    ims = torch.cat(ims).to(device)

    # -------------------------------------------------
    # Separate the batch of images and their classes into two different variables
    # ims, classes = list(zip(*batch))
    
    # Specify that augmentation must be done if the augmentation object is
    # provided. This is useful is we need to perform augmentation on
    # training data but not on validation data
    # if self.aug: ims=self.aug.augment_images(images=[t.cpu().numpy() for t in ims])

    # Create tensors of images, along with scaling data, by dividing the image shape by 255    
    # ims = np.array([t.cpu().numpy() for t in ims])
    # classes = np.array([t.cpu().numpy() for t in classes])
    
    # ims = torch.tensor(ims)[:,:,:,:].to(device)#/255.
    # classes = torch.tensor(classes).type(torch.LongTensor).to(device)
    # -------------------------------------------------
    
    return ims, classes

In [19]:
tr_images = train_info.PATH.to_numpy()
tr_images 

array(['/content/noise/train_034633.jpg',
       '/content/train_set/train_091111.jpg',
       '/content/train_set/train_111232.jpg', ...,
       '/content/train_set/train_101034.jpg',
       '/content/train_set/train_042681.jpg',
       '/content/train_set/train_016378.jpg'], dtype=object)

In [20]:
tr_targets = train_info.TYPE.to_numpy()
tr_targets

array([ 95, 206, 231, ..., 174,  45,  36])

In [21]:
val_images = val_info.PATH.to_numpy()
val_images 

array(['/content/val_set/val_001413.jpg',
       '/content/val_set/val_004268.jpg',
       '/content/val_set/val_007646.jpg', ...,
       '/content/val_set/val_008170.jpg',
       '/content/val_set/val_009807.jpg',
       '/content/val_set/val_006789.jpg'], dtype=object)

In [22]:
val_targets = val_info.TYPE.to_numpy()
val_targets

array([ 85, 179,  85, ..., 112,  11, 153])

Define the data augmentation pipeline:

In [23]:
from imgaug import augmenters as iaa
import random

def get_random_scale():
  return random.uniform(0.5, 1.5)

def get_random_translation():
  return {'x':random.randint(-50,50),'y':random.randint(-50,50)}

aug = iaa.Sequential([
iaa.Affine(rotate=(0,360), translate_px=get_random_translation(), scale=get_random_scale(), fit_output=False, mode="constant"),
iaa.SaltAndPepper(0.2),
iaa.GaussianBlur(sigma=1),
# iaa.LinearContrast(0.5),
# iaa.Multiply(0.5),
])




In [24]:
train = FoodDataset(tr_images, tr_targets, aug=aug)
val = FoodDataset(val_images, val_targets)

Next, we define the DataLoader, along with the object's
collate_fn method, as follows:

In [25]:
BATCH = 32
trn_dl = DataLoader(train, batch_size=BATCH, collate_fn=train.collate_fn,shuffle=True, drop_last=True)
val_dl = DataLoader(val, batch_size=BATCH, collate_fn=train.collate_fn)

In [26]:
a,b = next(iter(trn_dl))
print(a.shape, b.shape)

# VGG16

In [27]:
import torchvision
import torch.nn as nn
import torch
import torch.nn.functional as F
from torchvision import transforms,models,datasets
!pip install torch_summary
from torchsummary import summary
device = 'cuda' if torch.cuda.is_available() else 'cpu'

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms,models,datasets
import matplotlib.pyplot as plt
from PIL import Image
from torch import optim
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import cv2, glob, numpy as np, pandas as pd
from glob import glob
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [28]:
def get_model():
  model = models.vgg16(pretrained=True).to(device)
  # Specify that we want to freeze all the parameters in the model
  # downloaded previously
  for param in model.parameters():
    param.requires_grad = False
  
  # Replace the avgpool module to return a feature map of size 1 x 1
  # instead of 7 x 7, in other words, the output is now going to be
  # batch_size x 512 x 1 x 1:

  model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))

  # The layer above, nn.AdaptiveAvgPool2d, is yet
  # another pooling layer with a twist. We specify the output feature
  # map size instead. The layer automatically computes the kernel size
  # so that the specified feature map size is returned. For example, if the
  # input feature map size dimensions were batch_size x 512 x k
  # x k, then the pooling kernel size is going to be k x k. The major
  # advantage with this layer is that whatever the input size, the output
  # from this layer is always fixed and, hence, the neural network can
  # accept images of any height and width.




  # Define the classifier module of the model, where we first flatten
  # the output of the avgpool module, connect the 512 units to the 256
  # units, and perform an activation prior to connecting to the output
  # layer:
  model.classifier = nn.Sequential(nn.Flatten(),
                                    nn.Linear(512, 256),
                                    nn.ReLU(),
                                    nn.Dropout(0.2),
                                    nn.Linear(256, 251),
                                    nn.Softmax())
  
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),lr= 1e-3)
  return model.to(device), loss_fn, optimizer

In [29]:
!pip install torch_summary
from torchsummary import summary
model, criterion, optimizer = get_model()
summary(model, torch.zeros(1,3,224,224))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:06<00:00, 88.3MB/s]


Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        (1,792)
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        (36,928)
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       (73,856)
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       (147,584)
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         (295,168)
|    └─ReLU: 2-12                        [-1, 256, 56,

  input = module(input)


Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        (1,792)
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        (36,928)
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       (73,856)
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       (147,584)
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         (295,168)
|    └─ReLU: 2-12                        [-1, 256, 56,

In [30]:
def train_batch(x, y, model, opt, loss_fn):
  '''
  This code passes the batch of images through the model in the
  forward pass. It also computes the loss on batch and then passes the
  weights through backward propagation and updates them. Finally, it
  flushes the memory of the gradient so that it doesn't influence how the
  gradient is calculated in the next pass.
  '''
  model.train() # <- let's hold on to this until we reach dropout section


  # ----------------------------------
  # call your model like any python function on your batch
  # of inputs
  prediction = model(x)
  # ----------------------------------



  # ----------------------------------
  # compute loss
  batch_loss = loss_fn(prediction, y)
  # ----------------------------------



  # ----------------------------------
  # based on the forward pass in `model(x)` compute all the
  # gradients of 'model.parameters()'
  batch_loss.backward()
  # ----------------------------------


  # ----------------------------------
  # apply new-weights = f(old-weights, old-weight-gradients)
  # where "f" is the optimizer
  optimizer.step()
  # ----------------------------------


  # ----------------------------------
  # Flush gradients memory for next batch of calculations
  optimizer.zero_grad()
  return batch_loss.item() # Now that we've done this, we can extract the loss value as a scalar by fetching batch_loss.item() on top of batch_loss
  # ----------------------------------


# since there's no need for updating weights,
# we might as well not compute the gradients.
# Using this '@' decorator on top of functions
# will disable gradient computation in the entire function
@torch.no_grad()
def accuracy(x, y, model):
  
  model.eval() # <- let's wait till we get to dropout section

  # ----------------------------------
  # get the prediction matrix for a tensor of `x` images
  prediction = model(x)
  # In the following lines of code, we are explicitly mentioning that we don't
  # need to calculate the gradient by providing @torch.no_grad() and
  # calculating the prediction values by feed-forwarding input through the
  # model
  # ----------------------------------


  # ----------------------------------
  # compute if the location of maximum in each row
  # coincides with ground truth
  # we invoke prediction.max(-1) to identify the argmax index corresponding to each row.
  max_values, argmaxes = prediction.max(-1) 
  # ----------------------------------


  # ----------------------------------
  # Furthermore, we are comparing our argmaxes with the ground truth
  # through argmaxes == y so that we can check whether each row is
  # predicted correctly.
  is_correct = argmaxes == y
  # ----------------------------------



  # ----------------------------------
  return is_correct.cpu().numpy().tolist() # Finally, we are returning the list of is_correct objects after moving it to a CPU and converting it into a numpy array.
  # ----------------------------------


In [31]:
model, loss_fn, optimizer = get_model()

In [32]:
# Invoke the lists that contain the accuracy and loss values at the end of each epoch
train_losses, train_accuracies = [], [] 
val_accuracies = []


for epoch in range(5): # Define the number of epochs
  print(f" epoch {epoch + 1}/5")

  # Invoke the lists that will contain the accuracy and loss values
  # corresponding to each batch within an epoch
  train_epoch_losses, train_epoch_accuracies = [], []
  val_epoch_accuracies = []


  # for ix, batch in enumerate(iter(trn_dl)):
  #   x, y = batch
  #   # y = y.squeeze()
  #   # print(len(y))
  #   batch_loss = train_batch(x, y, model, optimizer, loss_fn)
  #   train_epoch_losses.append(batch_loss)

  # train_epoch_loss = np.array(train_epoch_losses).mean()


  for ix, batch in enumerate(iter(trn_dl)):
    x, y = batch
    is_correct = accuracy(x, y, model)
    train_epoch_accuracies.extend(is_correct)

  train_epoch_accuracy = np.mean(train_epoch_accuracies)


  for ix, batch in enumerate(iter(val_dl)):
    x, y = batch
    val_is_correct = accuracy(x, y, model)
    val_epoch_accuracies.extend(val_is_correct)
  
  val_epoch_accuracy = np.mean(val_epoch_accuracies)



  train_losses.append(train_epoch_loss)
  train_accuracies.append(train_epoch_accuracy)
  val_accuracies.append(val_epoch_accuracy)

KeyboardInterrupt: ignored

In [None]:
epochs = np.arange(5)+1
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
%matplotlib inline
plt.plot(epochs, train_accuracies, 'bo',
label='Training accuracy')
plt.plot(epochs, val_accuracies, 'r',
label='Validation accuracy')
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.title('Training and validation accuracy \
with VGG16 \nand 1K training data points')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.ylim(0.95,1)
plt.gca().set_yticklabels(['{:.0f}%'.format(x*100) \
for x in plt.gca().get_yticks()])
plt.legend()
plt.grid('off')
plt.show()