<a href="https://colab.research.google.com/github/Dipak22/Case-Studies/blob/master/DataLoaders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import necessary function and classes

In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from tqdm.notebook import tqdm


In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/kaggle-cat-vs-dog-dataset


In [6]:
import os # Allows to access files
import numpy as np
from PIL import Image # Allows us to Load Images
from collections import Counter # Utility function to give us the counts of unique items in an iterable

##Create dataset class

### Create image transformations to apply

In [24]:
img_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
                                     ])

In [26]:
class DogsVsCats(Dataset):
  def __init__(self, path_to_folder):
    path_to_cats = os.path.join(path_to_folder, "Cat")
    path_to_dogs = os.path.join(path_to_folder, "Dog")

    dog_files = os.listdir(path_to_dogs)
    cat_files = os.listdir(path_to_cats)

    path_to_dog_files= [os.path.join(path_to_dogs,file) for file in dog_files]
    path_to_cat_files = [os.path.join(path_to_cats,file) for file in cat_files]

    self.training_files = path_to_dog_files + path_to_cat_files
    self.dog_label, self.cat_label = 0,1
    self.transforms = img_transforms
  def __len__(self):
    return len(self.training_files)

  def __getitem__(self, idx):
    path_to_image = self.training_files[idx]
    if "Dog" in path_to_image:
      label = self.dog_label
    else:
      label = self.cat_label
    image = Image.open(path_to_image)
    image = self.transforms(image)
    return image, label

dogvcat = DogsVsCats(path +"/kagglecatsanddogs_3367a/PetImages/")
print("total Training samples", len(dogvcat))



dogsvcatsloader = DataLoader(dogvcat, batch_size = 16, shuffle = True)

for images, labels in dogsvcatsloader:
  print("image label:", labels)
  print("Image shape", images.shape)
  break

total Training samples 24961
image label: tensor([1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1])
Image shape torch.Size([16, 3, 224, 224])


In [23]:
i, l = dogvcat[0]
i.shape, l

(torch.Size([3, 375, 500]), 0)

## partition the data into train and test splits

In [29]:
train_samples = int(0.9 * len(dogvcat))
test_samples = len(dogvcat) - train_samples
print(f"Train samples : {train_samples} , Test samples: {test_samples}")

train_dataset, test_dataset = torch.utils.data.random_split(dogvcat,
                                                            lengths=[train_samples, test_samples])

train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size=16)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

## Test Loaders ###
for images, labels in train_dataloader:
    print(images.shape)
    print(labels)
    break

for images, labels in test_dataloader:
    print(images.shape)
    print(labels)
    break


Train samples : 22464 , Test samples: 2497
torch.Size([16, 3, 224, 224])
tensor([0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0])
torch.Size([16, 3, 224, 224])
tensor([1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1])
