In [2]:
# !pip install mediapipe

In [66]:
import cv2
import mediapipe as mp
import pandas as pd
import numpy as np
import json
import tensorflow as tf
from tensorflow.keras.models import model_from_json

import video_with_landmarks
import video_to_landmark_coordinates
import preprocess_coordinates_data
from load_model import Embedding, Encoder, Decoder, LandmarkEmbedding, EncoderTransformerBlock, MultiHeadAttention, DecoderTransformerBlock
import predict_sequence
# import Gradio_inference

# Load the character to prediction index dictionary
character_to_prediction = 'character_to_prediction_index.json'
with open(character_to_prediction) as json_file:
    ORD2CHAR = json.load(json_file)

# Convert keys to integers
ORD2CHAR = {int(k): v for k, v in ORD2CHAR.items()}

# Load the variables from the JSON file
json_file_path = "variables.json"
with open(json_file_path, 'r') as json_file:
    variables_dict = json.load(json_file)

# Convert the variables to the correct data type
LIPS_LANDMARK_IDXS = np.array(variables_dict['LIPS_LANDMARK_IDXS'])
LAYER_NORM_EPS = variables_dict['LAYER_NORM_EPS']
UNITS_ENCODER = variables_dict['UNITS_ENCODER']
UNITS_DECODER = variables_dict['UNITS_DECODER']
NUM_BLOCKS_ENCODER = variables_dict['NUM_BLOCKS_ENCODER']
NUM_BLOCKS_DECODER = variables_dict['NUM_BLOCKS_DECODER']
NUM_HEADS = variables_dict['NUM_HEADS']
MLP_RATIO = variables_dict['MLP_RATIO']
EMBEDDING_DROPOUT = variables_dict['EMBEDDING_DROPOUT']
MLP_DROPOUT_RATIO = variables_dict['MLP_DROPOUT_RATIO']
MHA_DROPOUT_RATIO = variables_dict['MHA_DROPOUT_RATIO']
CLASSIFIER_DROPOUT_RATIO = variables_dict['CLASSIFIER_DROPOUT_RATIO']
N_TARGET_FRAMES = variables_dict['N_TARGET_FRAMES']
N_UNIQUE_CHARACTERS = variables_dict['N_UNIQUE_CHARACTERS']
N_UNIQUE_CHARACTERS0 = variables_dict['N_UNIQUE_CHARACTERS0']
PAD_TOKEN = variables_dict['PAD_TOKEN']
SOS_TOKEN = variables_dict['SOS_TOKEN']
MAX_PHRASE_LENGTH = variables_dict['MAX_PHRASE_LENGTH']
MEANS = np.array(variables_dict['MEANS'])
STDS = np.array(variables_dict['STDS'])

custom_objects = {'Embedding': Embedding,
                  'Encoder': Encoder,
                  'Decoder': Decoder,
                  'LandmarkEmbedding': LandmarkEmbedding,
                  'EncoderTransformerBlock': EncoderTransformerBlock,
                  'MultiHeadAttention': MultiHeadAttention,
                  'DecoderTransformerBlock': DecoderTransformerBlock}




In [18]:
# 1. load video and process it with landmarks
#original_video_path = "videoplayback.mp4"
output_path = "videoplayback.mp4"
# video_with_landmarks.process_video_with_landmarks(original_video_path, output_path)


In [19]:

# 2. extract landmarks
df = video_to_landmark_coordinates.video_to_landmarks(output_path, video_to_landmark_coordinates.generate_column_names())
# Save the DataFrame to a CSV file
# df.to_csv('landmarks.csv', index=False)


In [102]:
# 3. preprocess landmarks
# Read data from a CSV file
# df = pd.read_csv('landmarks.csv')

# Get the indices of columns of interest
LEFT_HAND_IDXS0, LEFT_HAND_NAMES0 = preprocess_coordinates_data.get_idxs(df, ['left_hand'], ['z'])
RIGHT_HAND_IDXS0, RIGHT_HAND_NAMES0 = preprocess_coordinates_data.get_idxs(df, ['right_hand'], ['z'])
LIPS_IDXS0, LIPS_NAMES0 = preprocess_coordinates_data.get_idxs(df, ['face'], ['z'], idxs_pos=LIPS_LANDMARK_IDXS)
COLUMNS0 = np.concatenate((LEFT_HAND_NAMES0, RIGHT_HAND_NAMES0, LIPS_NAMES0))
N_COLS0 = len(COLUMNS0)
#N_COLS = N_COLS0

df = df[COLUMNS0]  # select only columns of interest equal to N_COLS0
all_tracking_sequence = df.values.reshape(1, -1, N_COLS0)  # reshape after converting DataFrame to numpy array
preprocess_layer_instance = preprocess_coordinates_data.PreprocessLayer()  # instantiate PreprocessLayer class
processed_sequence = preprocess_layer_instance(all_tracking_sequence)  # call instance with data

print(f'input sequence shape: {all_tracking_sequence.shape}')
print(f'processed sequence shape: {processed_sequence.shape}')


input sequence shape: (1, 359, 164)
processed sequence shape: (128, 164)


In [21]:
# 4. load model

json_file = open('model_architecture.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

# load model architecture from JSON file
model = model_from_json(loaded_model_json, custom_objects=custom_objects)

# load weights into the new model
model.load_weights("model.h5")

# loaded_model.summary(expand_nested=True, show_trainable=True, )


In [108]:

# 5. predict
sequence = np.expand_dims(processed_sequence, axis=0) # change shape to (1,128,164)

# Convert the one-hot encoded prediction to a string
predicted_phrase_one_hot = predict_sequence.predict_phrase(sequence,model)
# Assuming the output of predict_phrase is stored in 'outputs'
predicted_phrase_one_hot = predicted_phrase_one_hot[0]  # Remove the batch dimension
predicted_phrase = tf.argmax(predicted_phrase_one_hot, axis=-1).numpy()  # Convert one-hot encoding to index values
print(predicted_phrase)

true_phrase = predict_sequence.outputs2phrase(predicted_phrase,ORD2CHAR)
print(true_phrase)



[54 54 54 13 49 52 50 52 50 36 49 13 34 46 44 13 33 32 61 14 61 32 49 36
 12 17 15 16 19 61 61 61]
www.rususer.com.ba/are-2014


In [106]:
#!unzip y_test.npy.zip
#!unzip X_test.npy.zip

# load data
X_test = np.load('X_test.npy')
y_test = np.load('y_test.npy')[:,:MAX_PHRASE_LENGTH]
print(y_test[0])

# Print true first sequence
true_phrase = predict_sequence.outputs2phrase(y_test[0],ORD2CHAR)
print(true_phrase)

[19 17 20  0 32 45 35 49 52 50  0 43 46 46 47 61 59 59 59 59 59 59 59 59
 59 59 59 59 59 59 59 59]
425 andrus loop


In [89]:
#!pip install leven

In [99]:
from leven import levenshtein
from tqdm import tqdm
def get_ld_test(batch_size):
    LD_TEST = []
    for idx in tqdm(range(0, len(X_test), batch_size)):
        batch_frames = X_test[idx:idx+batch_size]
        true_phrases = y_test[idx:idx+batch_size]

        # Predict Phrases and Convert to Strings
        pred_phrases =  predict_sequence.predict_phrase(batch_frames).numpy()
        pred_phrases = [ predict_sequence.outputs2phrase(p,ORD2CHAR) for p in pred_phrases]

        # True Phrases Ordinal to Strings
        true_phrases = [ predict_sequence.outputs2phrase(t,ORD2CHAR) for t in true_phrases]

        # Add Levenshtein Distances
        LD_TEST.extend([{
            'phrase_true': true_phrase,
            'phrase_pred': pred_phrase,
            'levenshtein_distance': levenshtein(pred_phrase, true_phrase),
        } for true_phrase, pred_phrase in zip(true_phrases, pred_phrases)])

    # Convert to DataFrame
    LD_TEST_DF = pd.DataFrame(LD_TEST)

    return LD_TEST_DF

In [100]:
LD_TEST_DF = get_ld_test(512)

# Display Errors
display(LD_TEST_DF.head(30))

100%|██████████| 19/19 [04:45<00:00, 15.03s/it]


Unnamed: 0,phrase_true,phrase_pred,levenshtein_distance
0,425 andrus loop,425 nandrus loop,1
1,3130 oak stream dr,30 ream rd,10
2,620 new hampshire 6th cutoff,www.harewh.crtoff.cr,20
3,+39-4218-2683-13,+39-428-2683-1322,3
4,666-818-8052,+66-818-805222,3
5,3242 pine place road,3242 e aco road,6
6,54 malgren,4 malagree,3
7,402-210-8595,402-210-85955,1
8,2130 altrui place,7130 altipal trails,10
9,+374-6400-15,+41-41-41-41-41,11


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
