<a href="https://colab.research.google.com/github/rohit-447/Py-Torch-Learning/blob/main/Pytorch_CNN_Food101.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Working on Food101 Dataset as Custom Dataset


In [None]:
#@title Import of modules
import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torchvision.datasets import Food101
import matplotlib.pyplot as plt
from pathlib import Path
torch.__version__, torchvision.__version__

In [None]:
#@title Download of DataSet
FOOD101=Food101(root='data', download=True)

In [None]:
#@title Walk through Directory
import os
def walk_through_dir(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} files in {dirpath}")

#going through folder
PATH=Path('data')
walk_through_dir(PATH)

In [None]:
#@title Split of Dataset into Train and Test and save them.
from sklearn.model_selection import train_test_split
import os
import shutil
import tqdm.auto as tqdm
DATA_ORIGINAL_PATH='data/food-101'
IMAGES='images'
DATA_SPLIT_PATH='Dataset_new'
if not os.path.exists(DATA_SPLIT_PATH):
  os.makedirs(DATA_SPLIT_PATH, exist_ok=True)

source_dir=os.path.join(DATA_ORIGINAL_PATH,IMAGES)
train_dir=os.path.join(DATA_SPLIT_PATH, 'train')
test_dir=os.path.join(DATA_SPLIT_PATH, 'test')
val_dir=os.path.join(DATA_SPLIT_PATH, 'val')

#clear the dir if it exists
for dir_path in [train_dir, test_dir, val_dir]:
  if os.path.exists(dir_path):
    shutil.rmtree(dir_path)
  os.makedirs(dir_path)

#Iterrate over the classes of images
for  class_name in tqdm.tqdm(os.listdir(source_dir), desc="Processing Classes", unit="class"):
  class_path= os.path.join(source_dir, class_name)
  if not os.path.isdir(class_path):
    continue

  #list all images in class folder
  images=[os.path.join(class_path, img) for img in os.listdir(class_path) if img.endswith('.jpg')]
  dataset_train, dataset_test=train_test_split(images, test_size=0.2, random_state=42)

  #copy the train test files and save them
  def copy_files(file_list, save_path):
    save_root=os.path.join(save_path, class_name)
    os.makedirs(save_root, exist_ok=True)
    for f in file_list:
      shutil.copy(f, os.path.join(save_root, os.path.basename(f)))

  copy_files(dataset_train, train_dir)
  copy_files(dataset_test, test_dir)
print(f'Dataset Suceesfully split into Train and Test')

In [None]:
#checking dataset Split
walk_through_dir(DATA_SPLIT_PATH)

In [None]:
#@title Visulisation of Random Images
import random
import PIL
import glob
DATASET=Path('Dataset_new')
random_image=list(DATASET.glob('*/*/*.jpg'))
random_image_path=random.choice(random_image)
random_image_class=random_image_path.parent.stem
img=PIL.Image.open(random_image_path)
print(f'Image Path: {random_image_path}')
print(f'Class Height: {img.height} and Width: {img.width}')
img

In [None]:
#@title Device Agonistic Code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
#@title Data Transformation and Augmentation
from torchvision import transforms
data_transformation= transforms.Compose([transforms.Resize(size=(128,128)),
                                        transforms.RandomHorizontalFlip(p=0.5),
                                        transforms.ToTensor()])


In [None]:
#@title Data Comparison with Augmented Data
def plot_transformation_images(images_path, transform, n=3, seed=42):

  random_images_paths=random.sample(images_path, k=n)
  for random_image_path in random_images_paths:
    with PIL.Image.open(random_image_path) as f:
      #original image plot
      fig, ax= plt.subplots(1,2)
      ax[0].imshow(f)
      ax[0].set_title(f"Original Image: {random_image_path.parent.stem} \n Size{f.size}")
      ax[0].axis('off')

      #tranform image plot
      transfomed_image=transform(f).permute(1,2,0)
      ax[1].imshow(transfomed_image)
      ax[1].set_title(f"Transformed Image: {random_image_path.parent.stem} \n{transfomed_image.shape}")
      ax[1].axis('off')
      fig.suptitle(f"Class: {random_image_path.parent.stem}", fontsize=16)


plot_transformation_images(random_image, data_transformation)

In [None]:
#@title Loding Data via Using Custom Dataset via ImageFolder
from torchvision import datasets
TRAIN_DATA_DIR=Path('Dataset_new/train')
TEST_DATA_DIR=Path('Dataset_new/test')
train_data=datasets.ImageFolder(root=TRAIN_DATA_DIR, transform=data_transformation, target_transform=None)
test_data=datasets.ImageFolder(root=TEST_DATA_DIR, transform=data_transformation, target_transform=None)

#Getting Class name
class_names=train_data.classes
class_names_dict=train_data.class_to_idx
class_names_dict

In [14]:
from numbers import Number
#@title Turing images into DataLoader

from torch.utils.data import DataLoader
BATCH_SIZE=32
NUM_WORKERS=os.cpu_count()
train_dataloader= DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
test_dataloader= DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)