In [19]:
import os
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv3D, LSTM, Dense, Dropout, Bidirectional, MaxPool3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from typing import List

def load_data(path: str): 
    path = bytes.decode(path.numpy())
    #file_name = path.split('/')[-1].split('.')[0]
    # File name splitting for windows
    file_name = path.split('\\')[-1].split('.')[0]
    # video_path = os.path.join('data','s1',f'{file_name}.mpg')
    video_path = os.path.join('complete_data','s3',f'{file_name}.mpg')
    alignment_path = os.path.join('data','alignments','s1',f'{file_name}.align')
    frames = load_video(video_path) 
    # alignments = load_alignments(alignment_path)
    alignments=[]
    return frames, alignments

def load_video(path:str) -> List[float]: 
    print(path)
    cap = cv2.VideoCapture(path)
    frames = []
    for _ in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))): 
        ret, frame = cap.read()
        frame = tf.image.rgb_to_grayscale(frame)
        frames.append(frame[190:236,80:220,:])
    cap.release()
    
    mean = tf.math.reduce_mean(frames)
    std = tf.math.reduce_std(tf.cast(frames, tf.float32))
    return tf.cast((frames - mean), tf.float32) / std

def load_alignments(path:str) -> List[str]: 
    with open(path, 'r') as f: 
        lines = f.readlines() 
    tokens = []
    for line in lines:
        line = line.split()
        if line[2] != 'sil': 
            tokens = [*tokens,' ',line[2]]
    return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]

vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
num_to_char = tf.keras.layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
)

model = Sequential()
model.add(Conv3D(128, 3, input_shape=(75,46,140,1), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(Conv3D(256, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(Conv3D(75, 3, padding='same'))
model.add(Activation('relu'))
model.add(MaxPool3D((1,2,2)))

model.add(TimeDistributed(Flatten()))

model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))

model.add(Bidirectional(LSTM(128, kernel_initializer='Orthogonal', return_sequences=True)))
model.add(Dropout(.5))

model.add(Dense(char_to_num.vocabulary_size()+1, kernel_initializer='he_normal', activation='softmax'))

### Compile the model

model.load_weights('models_100_e/checkpoint')



<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x25210705f10>

In [20]:
list_data = tf.data.Dataset.list_files('./complete_data/s3/*.mpg')
file_list = list_data.as_numpy_iterator()
fl=list(file_list)

In [21]:

for i in fl:
    # print(str(i.decode('utf-8')))
    # sample = load_data(tf.convert_to_tensor('.\\s2_pwaq2a.mpg'))
    sample = load_data(tf.convert_to_tensor(i))

    yhat = model.predict(tf.expand_dims(sample[0], axis=0))

    decoded = tf.keras.backend.ctc_decode(yhat, input_length=[75], greedy=True)[0][0].numpy()

    predicted = [tf.strings.reduce_join([num_to_char(word) for word in sentence]) for sentence in decoded][0].numpy().decode('utf-8')

    print(i, " : ", predicted)

complete_data\s3\bwbn3a.mpg
b'.\\complete_data\\s3\\bwbn3a.mpg'  :  lay le in o ploeain
complete_data\s3\prap3s.mpg
b'.\\complete_data\\s3\\prap3s.mpg'  :  set blue s nene sooon
complete_data\s3\sgwj3s.mpg
b'.\\complete_data\\s3\\sgwj3s.mpg'  :  place wree wit i ore psleain
complete_data\s3\pgiq2p.mpg
b'.\\complete_data\\s3\\pgiq2p.mpg'  :  lay blue in s seueve ageain
complete_data\s3\lbbk2n.mpg
b'.\\complete_data\\s3\\lbbk2n.mpg'  :  bin whee in g zore seasin
complete_data\s3\srwi2p.mpg
b'.\\complete_data\\s3\\srwi2p.mpg'  :  lay blue ih i toure segeain
complete_data\s3\prwj7s.mpg
b'.\\complete_data\\s3\\prwj7s.mpg'  :  lay blue si soe again
complete_data\s3\srwu9s.mpg
b'.\\complete_data\\s3\\srwu9s.mpg'  :  lay gle st soe seasin
complete_data\s3\bbbz6n.mpg
b'.\\complete_data\\s3\\bbbz6n.mpg'  :  place re in t seve sgean
complete_data\s3\bwwh3a.mpg
b'.\\complete_data\\s3\\bwwh3a.mpg'  :  sin blee in x sive again
complete_data\s3\pbwj2p.mpg
b'.\\complete_data\\s3\\pbwj2p.mpg'  :  lac g

KeyboardInterrupt: 

In [16]:
print('~'*100, 'PREDICTIONS')
predicted = [tf.strings.reduce_join([num_to_char(word) for word in sentence]) for sentence in decoded][0].numpy().decode('utf-8')

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PREDICTIONS


'set green it r seve sgin'

In [3]:
model.reset_states()