In [15]:
# Download the Cat and Dog Datasets
!curl -L -o cats-and-dogs-mini-dataset.zip\
    https://www.kaggle.com/api/v1/datasets/download/aleemaparakatta/cats-and-dogs-mini-dataset
    

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 21.8M  100 21.8M    0     0  44.7M      0 --:--:-- --:--:-- --:--:-- 79.8M


In [16]:
# Creating Dataset structure for Training and Inferencing

!mkdir data
!unzip cats-and-dogs-mini-dataset.zip -d dataset

Archive:  cats-and-dogs-mini-dataset.zip
  inflating: dataset/cats_set/cat.4001.jpg  
  inflating: dataset/cats_set/cat.4002.jpg  
  inflating: dataset/cats_set/cat.4003.jpg  
  inflating: dataset/cats_set/cat.4004.jpg  
  inflating: dataset/cats_set/cat.4005.jpg  
  inflating: dataset/cats_set/cat.4006.jpg  
  inflating: dataset/cats_set/cat.4007.jpg  
  inflating: dataset/cats_set/cat.4008.jpg  
  inflating: dataset/cats_set/cat.4009.jpg  
  inflating: dataset/cats_set/cat.4010.jpg  
  inflating: dataset/cats_set/cat.4011.jpg  
  inflating: dataset/cats_set/cat.4012.jpg  
  inflating: dataset/cats_set/cat.4013.jpg  
  inflating: dataset/cats_set/cat.4014.jpg  
  inflating: dataset/cats_set/cat.4015.jpg  
  inflating: dataset/cats_set/cat.4016.jpg  
  inflating: dataset/cats_set/cat.4017.jpg  
  inflating: dataset/cats_set/cat.4018.jpg  
  inflating: dataset/cats_set/cat.4019.jpg  
  inflating: dataset/cats_set/cat.4020.jpg  
  inflating: dataset/cats_set/cat.4021.jpg  
  inflating: d

In [21]:
import os

# Creating the 'data' directory if not available
data_dir = 'data'
os.makedirs(data_dir,exist_ok=True)

#Creating the train and test subdirectories within 'data'
train_dir = os.path.join(data_dir,'train')
test_dir = os.path.join(data_dir,'test')
os.makedirs(train_dir,exist_ok=True)
os.makedirs(test_dir,exist_ok=True)

In [22]:
# moving training and testing data from 'dataset' to Data

import os

source_cat = 'dataset/cats_set'
dest_cat = 'dataset/cat'
source_dog = 'dataset/dogs_set'
dest_dog = 'dataset/dog'

# rename the directories if they exist
if os.path.exists(source_cat):
    os.rename(source_cat,dest_cat)
if os.path.exists(source_dog):
    os.rename(source_dog,dest_dog)

In [25]:
# training and testing

import os
import random
import shutil

def train_test_split_folder(source_folder,train_folder,test_folder,split_ratio=0.8):

    if not os.path.exists(train_folder):
        os.makedirs(train_folder)

    if not os.path.exists(test_folder):
        os.makedirs(test_folder)

    for class_name in os.listdir(source_folder):
        class_source_path = os.path.join(source_folder,class_name)

        if os.path.isdir(class_source_path):
            train_class_path = os.path.join(train_folder,class_name)
            test_class_path = os.path.join(test_folder,class_name)

            if not os.path.exists(train_class_path):
                os.makedirs(train_class_path)

            if not os.path.exists(test_class_path):
                os.makedirs(test_class_path)
            
            images = [f for f in os.listdir(class_source_path) if os.path.isfile(os.path.join(class_source_path,f))]
            random.shuffle(images)
            split_index = int(len(images) * split_ratio)
            train_images = images[:split_index]
            test_images = images[split_index:]

            for image in train_images:
                source_path = os.path.join(class_source_path,image)
                destination_path = os.path.join(train_class_path,image)
                shutil.copy(source_path,destination_path)
            
            for image in test_images:
                source_path = os.path.join(class_source_path,image)
                destination_path = os.path.join(test_class_path,image)
                shutil.copy(source_path, destination_path)

train_test_split_folder("dataset","data/train","data/test")


In [26]:
# Counting total images in Folder

import os

def count_images_per_folder(root_folder):

    image_counts = {}
    for dirpath,dirnames,filenames in os.walk(root_folder):
        image_count = 0
        for filename in filenames:
            if filename.lower().endswith(('.png','.jpg','.jpeg','.bmp','.gif')):
                image_count += 1
        if image_count > 0:
            image_counts[dirpath] = image_count
    return image_counts

image_counts = count_images_per_folder("data")
for folder,count in image_counts.items():
    print(f"Folder : {folder}, Number of Images : {count}")


Folder : data/train/cat, Number of Images : 400
Folder : data/train/dog, Number of Images : 400
Folder : data/test/cat, Number of Images : 100
Folder : data/test/dog, Number of Images : 100


In [27]:
!sudo apt install tree -y

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  tree
0 upgraded, 1 newly installed, 0 to remove and 3 not upgraded.
Need to get 47.4 kB of archives.
After this operation, 111 kB of additional disk space will be used.
Get:1 https://us-east-1.ec2.archive.ubuntu.com/ubuntu noble-updates/universe amd64 tree amd64 2.1.1-2ubuntu3.24.04.2 [47.4 kB]
Fetched 47.4 kB in 0s (971 kB/s)[0m[33m
debconf: delaying package configuration, since apt-utils is not installed

7[0;23r8[1ASelecting previously unselected package tree.
(Reading database ... 141970 files and directories currently installed.)
Preparing to unpack .../tree_2.1.1-2ubuntu3.24.04.2_amd64.deb ...
7[24;0f[42m[30mProgress: [  0%][49m[39m [..........................................................] 87[24;0f[42m[30mProgress: [ 20%][49m[39m [###########...............................................] 8Unpacking tree (2.1.1-2u

In [35]:
# directory structure 
!tree -d /content/data

/content/data  [error opening dir]

0 directories


In [36]:
!tree data

[01;34mdata[0m
├── [01;34mtest[0m
│   ├── [01;34mcat[0m
│   │   ├── [01;35mcat.4007.jpg[0m
│   │   ├── [01;35mcat.4011.jpg[0m
│   │   ├── [01;35mcat.4023.jpg[0m
│   │   ├── [01;35mcat.4024.jpg[0m
│   │   ├── [01;35mcat.4025.jpg[0m
│   │   ├── [01;35mcat.4028.jpg[0m
│   │   ├── [01;35mcat.4032.jpg[0m
│   │   ├── [01;35mcat.4041.jpg[0m
│   │   ├── [01;35mcat.4045.jpg[0m
│   │   ├── [01;35mcat.4047.jpg[0m
│   │   ├── [01;35mcat.4049.jpg[0m
│   │   ├── [01;35mcat.4050.jpg[0m
│   │   ├── [01;35mcat.4051.jpg[0m
│   │   ├── [01;35mcat.4055.jpg[0m
│   │   ├── [01;35mcat.4059.jpg[0m
│   │   ├── [01;35mcat.4067.jpg[0m
│   │   ├── [01;35mcat.4069.jpg[0m
│   │   ├── [01;35mcat.4073.jpg[0m
│   │   ├── [01;35mcat.4075.jpg[0m
│   │   ├── [01;35mcat.4079.jpg[0m
│   │   ├── [01;35mcat.4089.jpg[0m
│   │   ├── [01;35mcat.4092.jpg[0m
│   │   ├── [01;35mcat.4107.jpg[0m
│   │   ├── [01;35mcat.4112.jpg[0m
│   │   ├── [01;35mcat.4127.jpg[0m
│   │   ├── 

In [None]:
# Necessary Import