In [1]:
import os
import pathlib
import shutil
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
import tensorflow_datasets as tfds
from keras import regularizers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def create_label_dir(df):
    class_names = np.sort(df['label'].unique())
    number_of_classes = len(class_names)

    if not os.path.exists('../gdsc-ai-challenge/train'):
        return class_names, number_of_classes

    for class_name in class_names:
        dir = pathlib.Path('../gdsc-ai-challenge/train/' + class_name)
        if dir.exists():
            continue
        else:
            dir.mkdir()
    
    return class_names, number_of_classes

def sort_data(df):
    if not os.path.exists('../gdsc-ai-challenge/train'):
        return
        
    for image_dir in [str(img) for img in list(pathlib.Path('../gdsc-ai-challenge/train/train').glob('*.png'))]:
        id = int(image_dir.removeprefix('../gdsc-ai-challenge/train/train/').removesuffix('.png'))
        label = df['label'][id - 1]
        dest_path = os.path.join('../gdsc-ai-challenge/train', label, str(id) + '.png')
        shutil.move(image_dir, dest_path)

In [3]:
label_df = pd.read_csv('../gdsc-ai-challenge/trainLabels.csv')

class_names, number_of_classes = create_label_dir(label_df)
sort_data(label_df)

FileNotFoundError: [Errno 2] No such file or directory: '../gdsc-ai-challenge/train/train'

In [5]:
builder = tf.keras.utils.image_dataset_from_directory(
    '../gdsc-ai-challenge/train',
    color_mode='grayscale',
    batch_size=16,
    image_size=(32,32),
    seed=42
)

print(builder.cardinality())

def get_dataset_partitions_tf(ds, ds_size, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1
    
    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=42)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = get_dataset_partitions_tf(builder, 50000)

Found 50000 files belonging to 10 classes.
tf.Tensor(3125, shape=(), dtype=int64)


In [None]:
datagen = ImageDataGenerator(
    validation_split=0.3,
    rotation_range=15,
    zoom_range=0.3,
    horizontal_flip=True,
    rescale=1.0/255,
    fill_mode='nearest'
)

train_gen = datagen.flow_from_directory(
    '../gdsc-ai-challenge/train',
    target_size=(32,32),
    batch_size=10,
    subset='training',
    seed=42,
    color_mode='grayscale',
    class_mode='sparse'
)

val_gen = datagen.flow_from_directory(
    '../gdsc-ai-challenge/train',
    target_size=(32,32),
    batch_size=10,
    subset='validation',
    seed=42,
    color_mode='grayscale',
    class_mode='sparse'
)

In [None]:
model = keras.models.Sequential([
    keras.layers.Input((32,32,1)),
    keras.layers.Conv2D(8, (5,5), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(8, (5,5), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Conv2D(16, (3,3), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(16, (3,3), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Conv2D(32, (3,3), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(32, (3,3), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(32, (3,3), padding='same',
                        kernel_regularizer=regularizers.l2(1e-4),
                        activity_regularizer=regularizers.l2(1e-4),
                        kernel_initializer='he_normal',
                        activation='elu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Dropout(0.2),
    keras.layers.Flatten(),
    keras.layers.Dense(number_of_classes, activation='softmax')
])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
                                              loss='sparse_categorical_crossentropy',
                                              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
class stopCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}) :
        if (logs.get('val_accuracy') >= 0.999 and logs.get('accuracy') >= 0.999) :
            print('\nReached 99.9% accuracy so stopping training')
            self.model.stop_training = True

In [None]:
callback = stopCallback()

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='../Model/aiseries/version1.hdf5',
                                                               monitor='val_loss',
                                                               mode='min',
                                                               save_best_only=True)

history = model.fit(train_gen, 
                    epochs=1000, 
                    callbacks=[callback, model_checkpoint_callback], 
                    validation_data=val_gen)

In [None]:
plt.plot(history.history['loss'], color='orange', label='Train loss')
plt.plot(history.history['val_loss'], color='blue', label='Validation loss')
plt.legend()
plt.show()