# Creating Data Loaders/Generators

In [15]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

from tensorflow.data import Dataset
from tensorflow.keras import layers, Sequential, utils, optimizers, losses
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [2]:
x, y = make_classification(n_samples=10000, n_features=100, n_informative=30, n_classes=2, random_state=12)

## Creating the dataset class

In [3]:
class SampleDataset(keras.utils.Sequence):
    
    def __init__(self, x, y, batch_size=32, shuffle=True):
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.x = x
        self.y = y
    
    def __len__(self):
        return len(x)
    
    def __getitem__(self, index):
        x = self.x[index*self.batch_size : (index + 1)*self.batch_size]
        y = self.y[index*self.batch_size : (index + 1)*self.batch_size]
        return x, y

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [5]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((7000, 100), (3000, 100), (7000,), (3000,))

In [6]:
train_dataset = SampleDataset(x_train, y_train)
test_dataset = SampleDataset(x_test, y_test)

## Creating and training a simple model

In [7]:
model = Sequential(
                [layers.Input(shape=(100, )),
                layers.Dense(32, activation='relu'),
                layers.Dense(16, activation='relu'),
                layers.Dense(1)]
)



In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                3232      
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
Total params: 3,777
Trainable params: 3,777
Non-trainable params: 0
_________________________________________________________________


In [9]:
optimizer = optimizers.Adam(learning_rate=1e-3)
loss = losses.BinaryCrossentropy()

model.compile(optimizer=optimizer, loss=loss, metrics=['acc'])

In [10]:
model.fit(train_dataset, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x1a140a2aca0>

In [11]:
model.evaluate(test_dataset)



[2.633592128753662, 0.7926666736602783]

## Using predefined functions

In [12]:
def train_datagen():
    for i in range(7000):
        # img = Image.open(img_paths[idx])
        # label = img_labels[idx]
        img = x[i]
        label = y[i]
        yield img, label

In [13]:
train = Dataset.from_generator(train_datagen,
                               output_signature = (tf.TensorSpec(shape=(100,), dtype=tf.float64),
                               tf.TensorSpec(shape=(), dtype=tf.int32))).batch(32)

In [14]:
model.fit(train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1a140b086a0>

## One more Method (One for all)

If we have a dataframe containing the a column with the paths of the images we could use **ImageDataGenerator** to create a generetor which returns a batch of x and y with x being images and y being corresponding label.

In case of multilabel classification you can pass a list of column names in the **y_col** parameter.
Tutorial - <a href="https://vijayabhaskar96.medium.com/multi-label-image-classification-tutorial-with-keras-imagedatagenerator-cd541f8eaf24">here</a>

In [18]:
ImageDataGenerator.flow_from_dataframe

<function tensorflow.python.keras.preprocessing.image.ImageDataGenerator.flow_from_dataframe(self, dataframe, directory=None, x_col='filename', y_col='class', weight_col=None, target_size=(256, 256), color_mode='rgb', classes=None, class_mode='categorical', batch_size=32, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='png', subset=None, interpolation='nearest', validate_filenames=True, **kwargs)>