In [None]:
import os
import cv2
import tensorflow as tf
import numpy as np
from typing import List
from matplotlib import pyplot as plt

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    pass

In [None]:
vocab = [x for x in " অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহড়ঢ়য়ৎংঃঁািীুূেৈোৌৃ"]

In [None]:
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
num_to_char = tf.keras.layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
)

print(
    f"The vocabulary is: {char_to_num.get_vocabulary()} \n"
    f"(size ={char_to_num.vocabulary_size()})"
)

In [None]:
char_to_num([' ','অ', 'আ', 'ই', 'ঈ', 'উ', 'ঊ'])

In [None]:
print([bytes.decode(x) for x in num_to_char([14,  9,  3, 11, 13]).numpy()])

## for recovering the ram chunk error and load and make the tensor dataset train_data,test_data and val_data

In [None]:
import glob
import pickle

def load_chunk(file_path):
    with open(file_path, 'rb') as f:
        frames, labels = pickle.load(f)
    return frames, labels

def data_generator(file_paths):
    for file_path in file_paths:
        frames, labels = load_chunk(file_path)
        yield frames, labels

def create_tf_dataset_from_chunks(directory, batch_size=2):
    file_paths = glob.glob(os.path.join(directory, '*.pkl'))
    output_signature = (
        tf.TensorSpec(shape=(115, 54, 90, 1), dtype=tf.float32),
        tf.TensorSpec(shape=(55,), dtype=tf.int64),
    )

    dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(file_paths),
        output_signature=output_signature
    )

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

## call the dataset function for create dataset

In [None]:
train_data = create_tf_dataset_from_chunks('train_chunks', batch_size=2)
val_data = create_tf_dataset_from_chunks('val_chunks', batch_size=2)
test_data = create_tf_dataset_from_chunks('test_chunks', batch_size=2)

In [None]:
sample = train_data.as_numpy_iterator()
val = sample.next()
# print(val[1])

In [None]:
sample1 = val_data.as_numpy_iterator()
val1 = sample.next()
# print(val[1])

In [None]:
sample2 = test_data.as_numpy_iterator()
val2 = sample.next()
# print(val[1])

In [None]:
# 0:videos, 0: 1st video out of the batch,  0: return the first frame in the video 
plt.imshow(val[0][0][20])

In [None]:
# tf.strings.reduce_join([num_to_char(word) for word in val[1][0]])

words = [num_to_char(word) for word in val[1][0]]
joined_words = tf.strings.reduce_join(words).numpy().decode('utf-8')
print(joined_words)

# Design the Deep Neural Network

In [None]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
from tensorflow.keras.optimizers.legacy import Adam
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
train_data.as_numpy_iterator().next()[0][0].shape

In [None]:
tot_frames = 115


model = Sequential()
model.add(Conv3D(128, 3, input_shape=(tot_frames, 54, 90, 1), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(Conv3D(256, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(Conv3D(tot_frames, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(TimeDistributed(Flatten()))

model.add(Bidirectional(LSTM(256, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))

model.add(Bidirectional(LSTM(256, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))

model.add(Dense(char_to_num.vocabulary_size()+1, kernel_initializer='he_normal', activation='softmax'))


In [None]:
model.summary()

In [None]:
print(f'input shape: {model.input_shape}')
print(f'output shape: {model.output_shape}')

# Setup Training Options and Train

In [None]:
def scheduler(epoch, lr):
    if epoch < 30:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [None]:
def CTCLoss(y_true, y_pred):
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = tf.keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

In [None]:
class ProduceExample(tf.keras.callbacks.Callback): 
    def __init__(self, dataset) -> None: 
        self.dataset = dataset.as_numpy_iterator()
    
    def on_epoch_end(self, epoch, logs=None) -> None:
        data = self.dataset.next()
        yhat = self.model.predict(data[0])
        
        decoded = tf.keras.backend.ctc_decode(yhat, [tot_frames, tot_frames], greedy=False)[0][0].numpy()
        for x in range(len(yhat)):           
            print('Original:', tf.strings.reduce_join(num_to_char(data[1][x])).numpy().decode('utf-8'))
            print('Prediction:', tf.strings.reduce_join(num_to_char(decoded[x])).numpy().decode('utf-8'))
            print('~'*100)

In [None]:
def do_calculation(y_true_np, y_pred_np):
    total_chars = 0
    correct_chars = 0
    total_words = 0
    correct_words = 0
    
    for true_row, pred_row in zip(y_true_np, y_pred_np):
        true_row = true_row.astype(int)
        pred_row = pred_row.astype(int)
        
        true_str = tf.strings.reduce_join(num_to_char(true_row)).numpy().decode('utf-8')
        pred_str = tf.strings.reduce_join(num_to_char(pred_row)).numpy().decode('utf-8')
        
        total_chars += len(true_str)
        correct_chars += sum(1 for true_char, pred_char in zip(true_str, pred_str) if true_char == pred_char)

        total_words += len(true_str.split())
        correct_words += sum(1 for true_word, pred_word in zip(true_str.split(), pred_str.split()) if true_word == pred_word)
    
    char_accuracy = correct_chars / total_chars if total_chars > 0 else 0
    word_accuracy = correct_words / total_words if total_words > 0 else 0

    return np.array(char_accuracy, dtype=np.float32), np.array(word_accuracy, dtype=np.float32)

def word_accuracy(y_true, y_pred):
    decoded_pred = tf.keras.backend.ctc_decode(y_pred, input_length=tf.fill([tf.shape(y_pred)[0]], tf.shape(y_pred)[1]))[0][0]
    decoded_pred = tf.cast(decoded_pred, tf.int64)

    y_true = tf.keras.backend.ctc_label_dense_to_sparse(y_true, tf.fill([tf.shape(y_true)[0]], tf.shape(y_true)[1]))
    y_pred = tf.keras.backend.ctc_label_dense_to_sparse(decoded_pred, tf.fill([tf.shape(decoded_pred)[0]], tf.shape(decoded_pred)[1]))

    y_true_dense = tf.sparse.to_dense(y_true, default_value=-1)
    y_pred_dense = tf.sparse.to_dense(y_pred, default_value=-1)

    char_accuracy, word_accuracy = tf.numpy_function(do_calculation, [y_true_dense, y_pred_dense], [tf.float32, tf.float32])

    return tf.convert_to_tensor(word_accuracy, dtype=tf.float32)   #,tf.convert_to_tensor(char_accuracy, dtype=tf.float32)
    # return word_accuracy



def char_accuracy(y_true, y_pred):
    # Decode the predictions using CTC decode
    decoded_pred = tf.keras.backend.ctc_decode(y_pred, input_length=tf.fill([tf.shape(y_pred)[0]], tf.shape(y_pred)[1]))[0][0]
    decoded_pred = tf.cast(decoded_pred, tf.int64)

    # Removing the padding (value -1)
    y_true = tf.keras.backend.ctc_label_dense_to_sparse(y_true, tf.fill([tf.shape(y_true)[0]], tf.shape(y_true)[1]))
    y_pred = tf.keras.backend.ctc_label_dense_to_sparse(decoded_pred, tf.fill([tf.shape(decoded_pred)[0]], tf.shape(decoded_pred)[1]))
    
    # Converting sparse tensor to dense tensor
    y_true_dense = tf.sparse.to_dense(y_true, default_value=-1)
    y_pred_dense = tf.sparse.to_dense(y_pred, default_value=-1)
    
    # Use tf.numpy_function to print and return the numpy arrays
    char_accuracy, _ = tf.numpy_function(do_calculation, [y_true_dense, y_pred_dense], [tf.float32, tf.float32])
    
    return tf.convert_to_tensor(char_accuracy, dtype=tf.float32)

In [None]:
class MetricsHistory(tf.keras.callbacks.Callback):
    def __init__(self, log_file):
        super(MetricsHistory, self).__init__()
        self.log_file = log_file
        os.makedirs(os.path.dirname(log_file), exist_ok=True)

    def on_epoch_end(self, epoch, logs=None):
        with open(self.log_file, 'a') as f:
            f.write(f'Epoch {epoch + 1:02d}: '
                    f'Train Loss: {logs["loss"]:<10.4f} '
                    f'Train char Accuracy: {logs["char_accuracy"]:<10.4f} '
                    f'Train word accuracy: {logs["word_accuracy"]:<10.4f} '
                    f'Val Loss: {logs["val_loss"]:<10.4f} '
                    f'Val char accuracy: {logs["val_char_accuracy"]:<10.4f} '
                    f'Val word accuracy: {logs["val_word_accuracy"]:<10.4f}\n')


In [None]:

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    os.path.join('bangla_model','checkpoint'),
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True
    ) 

schedule_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

example_callback = ProduceExample(val_data)

# Initialize the custom callback
log_file = './metrics.txt'
metrics_history = MetricsHistory(log_file=log_file)

backup_callback = tf.keras.callbacks.BackupAndRestore(
    os.path.join('bangla_training_backup'),
    save_freq='epoch',
    delete_checkpoint=True
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15
)

model.compile(optimizer=Adam(learning_rate=0.0001), loss=CTCLoss, metrics=[word_accuracy, char_accuracy])


In [None]:
history = model.fit(train_data, validation_data=val_data, epochs=150, callbacks=[
                    backup_callback, checkpoint_callback, schedule_callback, early_stopping, example_callback, metrics_history])

# Metrices plotting

In [None]:
import matplotlib.pyplot as plt

# Read data from file
with open("metrics.txt", "r") as file:
   data = file.readlines()

# Parse the data
epochs = []
train_loss = []
train_char_accuracy = []
train_word_accuracy = []
val_loss = []
val_char_accuracy = []
val_word_accuracy = []


for line in data:
   parts = line.split()
   epochs.append(int(parts[1][:-1]))  # Extracting epoch number
   train_loss.append(float(parts[4]))
   train_char_accuracy.append(float(parts[8]))
   train_word_accuracy.append(float(parts[12]))
   val_loss.append(float(parts[15]))
   val_char_accuracy.append(float(parts[19]))
   val_word_accuracy.append(float(parts[23]))

# Plotting
plt.figure(figsize=(12, 10))

# Loss plot
plt.subplot(2, 2, (1, 2))
plt.plot(epochs, train_loss, '-o', label='Train Loss')
plt.plot(epochs, val_loss, label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Accuracy plot
plt.subplot(2, 2, (3, 4))
plt.plot(epochs, train_char_accuracy, '-o', label='Train Char Accuracy')
plt.plot(epochs, val_char_accuracy, '-o', label='Val Char Accuracy')
plt.plot(epochs, train_word_accuracy, '-o', label='Train Word Accuracy')
plt.plot(epochs, val_word_accuracy, '-o', label='Val Word Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

# Save plot to file
plt.savefig('metrics_plot.png')
# Show a message indicating that the plot is saved
print("Plot saved as 'metrics_plot.png'")

In [None]:
# for windows
# model.load_weights('.\\models\\checkpoint')
model.load_weights('./bangla_model/checkpoint')

In [None]:
val_single = val_data.as_numpy_iterator()

for _ in range(20):
    data = val_single.next()
    yhat = model.predict(data[0])
    
    decoded = tf.keras.backend.ctc_decode(yhat, [tot_frames, tot_frames], greedy=False)[0][0].numpy()
    for x in range(len(yhat)):           
        print('Original:', tf.strings.reduce_join(num_to_char(data[1][x])).numpy().decode('utf-8'))
        print('Prediction:', tf.strings.reduce_join(num_to_char(decoded[x])).numpy().decode('utf-8'))
        print('~'*100)

In [None]:
test_single = test_data.as_numpy_iterator()

for _ in range(20):
    data = test_single.next()
    yhat = model.predict(data[0])
    decoded = tf.keras.backend.ctc_decode(yhat, [tot_frames, tot_frames], greedy=False)[0][0].numpy()
    for x in range(len(yhat)):
        print('Original:', tf.strings.reduce_join(num_to_char(data[1][x])).numpy().decode('utf-8'))
        print('Prediction:', tf.strings.reduce_join(num_to_char(decoded[x])).numpy().decode('utf-8'))
        print('~'*100)