# Preprocessing Images

In [10]:
import tensorflow as tf
import numpy as np
import cv2
import glob


In [11]:
dir = "../input/natural-images/natural_images/"
image_size=(256, 256)
batch_size=32

#### This dataset contains images belonging to 8 different labels: 
##### Airplane, car, cat, dog, flower, fruit, motorbike, person
The dataset can be found [here](https://www.kaggle.com/prasunroy/natural-images)

In [12]:
for label in glob.glob(dir+'*'):
    print(label)

../input/natural-images/natural_images/motorbike
../input/natural-images/natural_images/flower
../input/natural-images/natural_images/fruit
../input/natural-images/natural_images/cat
../input/natural-images/natural_images/dog
../input/natural-images/natural_images/airplane
../input/natural-images/natural_images/car
../input/natural-images/natural_images/person


# Directly reading images using openCV

In [13]:
labels = []
images = []

for label, folder in enumerate(glob.glob(dir+'*')): 
    i=0
    batch = []
    
    for img in glob.glob(folder+'/*'):
        i+=1
        image = cv2.imread(img)
        # Resize the image according to use
        image = cv2.resize(image, dsize=image_size, interpolation=cv2.INTER_CUBIC)
        batch.append(image)
        
        if i == batch_size:            
            labels.append([label]*batch_size)
            images.append(np.array(batch))
            
images = np.array(images)
labels = np.array(labels)

In [15]:
x = images[1]
y = labels[1]
print(f"Ouput shape: {y.shape}\nInput Shape: {x.shape}")

Ouput shape: (32,)
Input Shape: (32, 256, 256, 3)


# Using Keras preprocessing

## Using the image_dataset_from_directory method

In [16]:
data2 = tf.keras.preprocessing.image_dataset_from_directory(
    dir,
    #validation_split=0.2,
    #subset="training",
    image_size=image_size,
    batch_size=batch_size,
)

Found 6899 files belonging to 8 classes.


In [17]:
for x, y in data2:
    print(f"Ouput shape: {y.shape}\nInput Shape: {x.shape}")
    break

Ouput shape: (32,)
Input Shape: (32, 256, 256, 3)


### Using ImageDataGenerator

In [18]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255)

In [21]:
dataset = datagen.flow_from_directory(
    dir,
    target_size=image_size,
    class_mode='categorical',
    batch_size= batch_size
    
)

Found 6899 images belonging to 8 classes.


In [22]:
for x, y in dataset:
    print(f"Ouput shape: {y.shape}\nInput Shape: {x.shape}")
    break

Ouput shape: (32, 8)
Input Shape: (32, 256, 256, 3)
