https://www.kaggle.com/code/imthebaron/bengali-ai-grapheme-classification?scriptVersionId=198759523

In [None]:
import os
import math

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from skimage.color import rgb2gray
from skimage import measure

import cv2
from tqdm import tqdm

In [None]:
tf.random.set_seed(42)

In [None]:
load_dir = '/kaggle/input/bengaliai/256_train/256/'

train = pd.read_csv('/kaggle/input/bengaliai-cv19/train.csv')
train.head()

In [None]:
train['filename'] = train.image_id.apply(lambda filename: load_dir + filename + '.png')
train = train[:50000]
len(train)

In [None]:
train.head()

In [None]:
train_df = pd.read_csv('/kaggle/input/bengaliai-cv19/train.csv')
print(f'Size of training data: {train_df.shape}')
train_df.head()

In [None]:
print(f'Number of unique graphemes: {train_df["grapheme_root"].nunique()}')
print(f'Number of unique vowel diacritic: {train_df["vowel_diacritic"].nunique()}')
print(f'Number of unique consonant diacritic: {train_df["consonant_diacritic"].nunique()}')

In [None]:
def get_pad_width(im, new_shape, is_rgb=True):
    pad_diff = new_shape - np.array(im.shape[:2])
    t, b = pad_diff[0] // 2, (pad_diff[0] + 1) // 2
    l, r = pad_diff[1] // 2, (pad_diff[1] + 1) // 2
    return ((t, b), (l, r), (0, 0)) if is_rgb else ((t, b), (l, r))

def crop_object(img, thresh=220, maxval=255, square=True):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh_gray = cv2.threshold(gray, thresh, maxval, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(thresh_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    x, y, w, h = max([cv2.boundingRect(c) for c in contours], key=lambda b: b[2]*b[3])
    crop = img[y:y+h, x:x+w]

    if square:
        pad_width = get_pad_width(crop, max(crop.shape[:2]), is_rgb=(img.ndim == 3))
        crop = np.pad(crop, pad_width=pad_width, mode='constant', constant_values=255)

    return crop

In [None]:
datagen = ImageDataGenerator(
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    #preprocessing_function= tf.keras.applications.efficientnet.preprocess_input
)

In [None]:
def data_generator(filenames, y, batch_size=128, shape=(64, 64, 1), random_state=42, preprocess=False, augmentation=False):
    y = y.copy()
    np.random.seed(random_state)
    indices = np.arange(len(filenames))

    while True:
        np.random.shuffle(indices)

        for i in range(0, len(indices), batch_size):
            batch_idx = indices[i:i + batch_size]
            size = len(batch_idx)

            batch_files = filenames[batch_idx]
            X_batch = np.zeros((size, *shape), dtype=np.float32)
            y_batch = y[batch_idx]

            for j, file in enumerate(batch_files):
                #print(file)
                img = cv2.imread(file)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

                if preprocess:
                    img = crop_object(img, thresh=250)  # Ensure img is still RGB
                    img = cv2.resize(img, shape[:2])  # Resize to (64, 64)
                else:
                    img = cv2.resize(img, shape[:2])  # Resize to (64, 64)

                if augmentation:
                    img = datagen.random_transform(img)

                # Convert the RGB image to grayscale
                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

                # Reshape to add a channel dimension
                img = img.reshape(*shape)  # Now shape is (64, 64, 1)

                X_batch[j] = img / 255.0  # Normalize pixel values to [0, 1]

            # Convert target labels from list of arrays to a tuple of arrays
            y_batch_tuple = tuple([y_batch[:, i] for i in range(y_batch.shape[1])])

            # Yield the batch and labels
            yield X_batch, y_batch_tuple

In [None]:
train_files, valid_files, y_train, y_valid = train_test_split(
    train.filename.values,
    train[['grapheme_root','vowel_diacritic', 'consonant_diacritic']].values,
    test_size=0.25,
)

In [None]:
image = cv2.cvtColor(cv2.imread('/kaggle/input/bengaliai/256_train/256/Train_200000.png'), cv2.COLOR_BGR2RGB)
plt.imshow(image)

In [None]:
#generator = data_generator(train_files, y_train, preprocess=False, augmentation=False)
generator = data_generator(train_files, y_train)
X_batch, _ = next(generator)

fig, axes = plt.subplots(5, 5, figsize=(5, 5), facecolor='lightgray')
axes = axes.flatten()

for img, ax in zip(X_batch, axes):
    ax.imshow(np.squeeze(img))  # Squeeze the image and use grayscale colormap
    ax.axis('off')  # Hide axis

plt.suptitle('Images from Training Set', fontsize=16)

# Adjust layout
plt.tight_layout(rect=[0, 0, 1, 1])  # Leave space for the title

# Save the figure
plt.savefig('sample_images.png', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
# Usage of the generator
#generator = data_generator(train_files, y_train, preprocess=True, augmentation=False)
generator = data_generator(train_files, y_train, preprocess=True, augmentation=False)
X_batch, _ = next(generator)

fig, axes = plt.subplots(5, 5, figsize=(5, 5), facecolor='lightgray')
axes = axes.flatten()

for img, ax in zip(X_batch, axes):
    ax.imshow(img)  # Display the RGB image
    ax.axis('off')  # Hide axis

plt.suptitle('Preprocessed Images', fontsize=16)

# Adjust layout
plt.tight_layout(rect=[0, 0, 1, 1])  # Leave space for the title

# Save the figure
plt.savefig('preprocessed.png', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
# Usage of the generator
#generator = data_generator(train_files, y_train, preprocess=True, augmentation=True)
generator = data_generator(train_files, y_train, preprocess=True, augmentation=True)
X_batch, _ = next(generator)

fig, axes = plt.subplots(5, 5, figsize=(5, 5), facecolor='lightgray')
axes = axes.flatten()

for img, ax in zip(X_batch, axes):
    ax.imshow(img)  # Display the RGB image
    ax.axis('off')  # Hide axis

plt.suptitle('Augmented + Preprocessed Images', fontsize=16)

# Adjust layout
plt.tight_layout(rect=[0, 0, 1, 1])  # Leave space for the title

# Save the figure
plt.savefig('augmented.png', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
batch_size = 128

#train_gen = data_generator(train_files, y_train, preprocess=True, augmentation=True)
#valid_gen = data_generator(valid_files, y_valid, preprocess=True, augmentation=True)

train_gen = data_generator(train_files, y_train, preprocess=True, augmentation=True)
valid_gen = data_generator(valid_files, y_valid, preprocess=True, augmentation=True)

print((len(train_files) , len(valid_files)))

train_steps = len(train_files) // batch_size
valid_steps = len(valid_files) // batch_size

In [None]:
from tensorflow.keras.layers import Input, Conv2D, SeparableConv2D, BatchNormalization, MaxPool2D, Dropout, Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.models import Model

def bengali_ai():
    inputs = Input(shape=(64, 64, 1))

    x = SeparableConv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(inputs)
    x = SeparableConv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = BatchNormalization(momentum=0.5)(x)
    x = MaxPool2D(pool_size=(2, 2))(x)

    x = Dropout(rate=0.2)(x)

    x = SeparableConv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = SeparableConv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = BatchNormalization(momentum=0.5)(x)
    x = MaxPool2D(pool_size=(2, 2))(x)

    x = SeparableConv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = SeparableConv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = BatchNormalization(momentum=0.5)(x)
    x = MaxPool2D(pool_size=(2, 2))(x)

    x = Dropout(rate=0.2)(x)

    x = SeparableConv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = SeparableConv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = BatchNormalization(momentum=0.5)(x)
    x = MaxPool2D(pool_size=(2, 2))(x)

    x = SeparableConv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = SeparableConv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(x)
    x = BatchNormalization(momentum=0.5)(x)
    x = MaxPool2D(pool_size=(2, 2))(x)

    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate=0.2)(x)

    # Dense Layers
    x = Dense(1024, activation="relu")(x)
    x = BatchNormalization(momentum=0.5)(x)

    # Output Layers
    root_out = layers.Dense(168, activation='softmax', name='grapheme')(x)
    vowel_out = layers.Dense(11, activation='softmax', name='vowel')(x)
    consonant_out = layers.Dense(7, activation='softmax', name='consonant')(x)

# Final Model
    model = Model(inputs=inputs, outputs=[root_out, vowel_out, consonant_out], name='bengali.ai')

    return model

model = bengali_ai()
model.summary()

In [None]:
tf.keras.utils.plot_model(model, dpi=500, to_file='model.png', show_shapes=True, show_layer_activations=True) # , show_trainable=True

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss={
        'grapheme': 'sparse_categorical_crossentropy',
        'vowel': 'sparse_categorical_crossentropy',
        'consonant': 'sparse_categorical_crossentropy'
    },
    metrics={
        'grapheme': 'accuracy',
        'vowel': 'accuracy',
        'consonant': 'accuracy'
    }
)

In [None]:
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoint.weights.h5',
    monitor='val_loss',
    mode='min',
    save_freq = 'epoch',
    save_weights_only=True,
    save_best_only=True)

In [None]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Monitor validation accuracy
    min_delta=0.001,         # Minimum change in monitored value to qualify as improvement
    patience=3,             # Stop after 10 epochs of no improvement
    mode='min',              # Maximize the validation accuracy
    restore_best_weights=True,  # Restore model weights from the best epoch
    verbose=1
)

In [None]:
train_history = model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    epochs=50,
    validation_data=valid_gen,
    validation_steps=valid_steps,
    callbacks=[model_checkpoint_callback, early_stopping_callback]
)

In [None]:
train_history.history.keys()

In [None]:
model.save('bengali.ai efficientb0 grayscale.h5')

In [None]:
plt.figure(figsize = (20, 5))
plt.plot(train_history.history['loss'], '--o', label='train loss')
plt.plot(train_history.history['val_loss'], '--o', label='val loss')
plt.legend()
plt.title('training loss & val loss')
plt.savefig('fig_total_loss.png', format='png', dpi=400)
plt.show()

In [None]:
plt.figure(figsize = (20, 5))
plt.plot(train_history.history['grapheme_accuracy'], '--o', label='grapheme accuracy')
plt.plot(train_history.history['val_grapheme_accuracy'], '--o', label='val grapheme accuracy')
plt.legend()
plt.title('training grapheme acc & val grapheme acc')
plt.savefig('fig_grapheme_acc.png', format='png', dpi=400)
plt.show()

In [None]:
plt.figure(figsize = (20, 5))
plt.plot(train_history.history['vowel_accuracy'], '--o', label='vowel accuracy')
plt.plot(train_history.history['val_vowel_accuracy'], '--o', label='val vowel accuracy')
plt.legend()
plt.title('training vowel acc & val vowel acc')
plt.savefig('fig_vowel_acc.png', format='png', dpi=400)
plt.show()

In [None]:
plt.figure(figsize = (20, 5))
plt.plot(train_history.history['consonant_accuracy'], '--o', label='consonant accuracy')
plt.plot(train_history.history['val_consonant_accuracy'], '--o', label='val consonant accuracy')
plt.legend()
plt.title('training consonant acc & val consonant acc')
plt.savefig('fig_consonant_acc.png', format='png', dpi=400)
plt.show()

In [None]:
pd.DataFrame(train_history.history).to_csv('history.csv', index=False)

In [None]:
df = pd.read_csv('/kaggle/working/history.csv')
print(df.shape)
df.head(df.shape[0])