##### Loading the dataset

In [None]:
import os, shutil, pathlib

original_dir = pathlib.Path("Data/train")
new_base_dir = pathlib.Path("Data/kaggle_dogs_vs_cats_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

##### EDA: Explore the data with relevant graphs, statistics and insights 

##### importing the required libraries

In [6]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pathlib

In [7]:
data_folder = pathlib.Path('Data/kaggle_dogs_vs_cats_small')

##### Random numbers

In [18]:
random_numbers = np.random.normal(size=(1000, 16))

In [19]:
print(type(random_numbers))
print(random_numbers.shape)
print(random_numbers.dtype)
print(random_numbers[:4])

<class 'numpy.ndarray'>
(1000, 16)
float64
[[-0.1706301   0.47733981  0.61086006 -0.6261309   1.78631815  1.30773067
   0.06694159  1.0339029  -0.9839949   0.19575353 -0.6411     -0.06758709
  -0.25074352  2.06607531 -0.00925246 -0.46160289]
 [ 0.28483996 -1.74353046  0.48621393 -0.06133071 -1.54852993 -1.09811719
   0.25188295 -0.54953267  0.55153544  0.28611023  0.73873451  1.1710162
   2.42646699 -0.04496869 -0.51304358  1.24591216]
 [-0.15045265  0.63594586 -0.31754799 -0.22527966 -0.3439997   1.57062705
   0.98946492  0.77400065 -0.08795634  0.30756077  1.3059014  -0.61677542
  -0.08327399 -1.16178349  2.37681339 -0.57664443]
 [-0.92365345  0.39030272 -2.36150109  0.97320394 -0.57373059 -0.71222249
  -0.43301238 -0.95637883 -0.00853996  2.38565745 -0.82579108 -0.81363516
  -0.09572037 -0.50016134 -0.26472383  0.95428346]]


In [20]:
dataset = tf.data.Dataset.from_tensor_slices(random_numbers)

In [21]:
type(dataset)

tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset

In [None]:
for i, d in enumerate(['A','B','C']):
    print(i,d)

In [22]:
for i, element in enumerate(dataset):
    print(element.shape)
    if i >= 2:
        break

(16,)
(16,)
(16,)


In [23]:
for i, element in enumerate(dataset):
    print(element)
    if i >= 2:
        break

tf.Tensor(
[-0.1706301   0.47733981  0.61086006 -0.6261309   1.78631815  1.30773067
  0.06694159  1.0339029  -0.9839949   0.19575353 -0.6411     -0.06758709
 -0.25074352  2.06607531 -0.00925246 -0.46160289], shape=(16,), dtype=float64)
tf.Tensor(
[ 0.28483996 -1.74353046  0.48621393 -0.06133071 -1.54852993 -1.09811719
  0.25188295 -0.54953267  0.55153544  0.28611023  0.73873451  1.1710162
  2.42646699 -0.04496869 -0.51304358  1.24591216], shape=(16,), dtype=float64)
tf.Tensor(
[-0.15045265  0.63594586 -0.31754799 -0.22527966 -0.3439997   1.57062705
  0.98946492  0.77400065 -0.08795634  0.30756077  1.3059014  -0.61677542
 -0.08327399 -1.16178349  2.37681339 -0.57664443], shape=(16,), dtype=float64)


In [24]:
batched_dataset = dataset.batch(32)
for i, element in enumerate(batched_dataset):
    print(element.shape)
    if i >= 2:
        break

(32, 16)
(32, 16)
(32, 16)


In [25]:
type(batched_dataset)

tensorflow.python.data.ops.batch_op._BatchDataset

##### Using Keras Utility Functions to Create a Dataset for Images


In [26]:
from tensorflow.keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    data_folder / "train",
    image_size=(180, 180),
    batch_size=32)
validation_dataset = image_dataset_from_directory(
    data_folder / "validation",
    image_size=(180, 180),
    batch_size=32)
test_dataset = image_dataset_from_directory(
    data_folder / "test",
    image_size=(180, 180),
    batch_size=32)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


#### Train dataset

In [27]:
type(train_dataset)

tensorflow.python.data.ops.prefetch_op._PrefetchDataset

##### Displaying the shapes of the data and labels 

In [28]:
for data_batch, labels_batch in train_dataset:
    print("data batch shape:", data_batch.shape)
    print("labels batch shape:", labels_batch.shape)
    break

data batch shape: (32, 180, 180, 3)
labels batch shape: (32,)
