In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

def print_input_files():
    import os
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
class Dataset:
    def __init__(self, train_file_path, base_folder_path):
        if base_folder_path.endswith("/"):
            self.base_folder_path = base_folder_path
        else:
            self.base_folder_path = base_foldder_path + "/"
        
        self.train_data = pd.read_csv(train_file_path)
        self.relevant_data = None
        self.pq_path = None
        
    def get_relevant_data_subset(self, pq_path, sequence_id):
        if pq_path == self.pq_path:
            return self.relevant_data.loc[sequence_id]
        
        self.relevant_data = pd.read_parquet(base_folder_path + pq_path)
        self.pq_path = pq_path
        return self.relevant_data.loc[sequence_id]

In [3]:
base_folder_path = "/kaggle/input/asl-fingerspelling/"
train_file_path = "/kaggle/input/asl-fingerspelling/train.csv"
dataset = Dataset(train_file_path, base_folder_path)

In [4]:
def calculate_frame_frequency():    
    total_frames = 0
    total_output_size = 0
    for pq_path, sequence_id, output in zip(dataset.train_data['path'], dataset.train_data['sequence_id'], dataset.train_data['phrase']):
        relevant_data = dataset.get_relevant_data_subset(pq_path, sequence_id)
        total_frames += len(relevant_data)
        total_output_size += len(output)

    print(total_frames)
    print(total_output_size)
    print("Average frames per character: {}".format(total_frames / total_output_size))

In [5]:
from collections import defaultdict, Counter

counter = Counter(''.join(dataset.train_data['phrase']))    
most_common_character = counter.most_common(1)[0][0]

In [6]:
# TODO
# 1. Load character map
# 2. Initialize weights so that it always map to most common character
# 3. Build model
# 4. Convert into TFLite
# 5. Submit

In [7]:
import json

with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)
rev_character_map = {j:i for i,j in character_map.items()}

print(len(rev_character_map))

59


Apparently we are not given a video but rather 3D coordinates of the different points of the body, probably for hiding sensitive information.

In [8]:
import tensorflow as tf
from tensorflow.keras import models, layers

def create_model():
    # Given the input and output size
    output_size = len(rev_character_map)
    input_size = dataset.get_relevant_data_subset(dataset.train_data.iloc[0]['path'], dataset.train_data.iloc[0]['sequence_id']).shape[1]
    
    # Build model
    input_layer = layers.Input(shape=(input_size,), name="inputs")
    nan_to_zero = layers.Lambda(
        lambda x: tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), name="nan_to_zero")(input_layer)
    output_layer = layers.Dense(output_size, name='outputs')(nan_to_zero)
    
    # Create the Dense layer
    dense_layer = layers.Dense(output_size, trainable=False, name='outputs')

    # Calculate weights
    weights = np.zeros((input_size, output_size))
    bias = np.zeros((output_size,))
    bias[character_map[most_common_character]] = 1

    # Set weights
    dense_layer.build((None, input_size))
    dense_layer.set_weights([weights, bias])

    # Call the layer passing the input tensor
    output_layer = dense_layer(nan_to_zero)
    
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    # Compile and return
    model.compile(optimizer='adam', loss='mse')
    return model

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [9]:
model = create_model()
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

In [10]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inputs (InputLayer)         [(None, 1630)]            0         
                                                                 
 nan_to_zero (Lambda)        (None, 1630)              0         
                                                                 
 outputs (Dense)             (None, 59)                96229     
                                                                 
Total params: 96,229
Trainable params: 0
Non-trainable params: 96,229
_________________________________________________________________


In [11]:
def evaluate_model(model):
    frames = dataset.get_relevant_data_subset(dataset.train_data.iloc[0]['path'], dataset.train_data.iloc[0]['sequence_id'])

    REQUIRED_SIGNATURE = "serving_default"
    REQUIRED_OUTPUT = "outputs"

    with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
        character_map = json.load(f)
    rev_character_map = {j:i for i,j in character_map.items()}

    found_signatures = list(model.get_signature_list().keys())

    if REQUIRED_SIGNATURE not in found_signatures:
        raise KernelEvalException('Required input signature not found.')

    prediction_fn = model.get_signature_runner("serving_default")
    output = prediction_fn(inputs=frames)
    prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
    return prediction_str

In [12]:
!zip -r submission.zip model.tflite

  adding: model.tflite (deflated 100%)


In [13]:
# Equivalent of how the model is evaluated

def evaluate_model_src():
    model_path = "/kaggle/working/model.tflite"
    frames = dataset.get_relevant_data_subset(dataset.train_data.iloc[0]['path'], dataset.train_data.iloc[0]['sequence_id'])

    import tflite_runtime.interpreter as tflite
    interpreter = tflite.Interpreter(model_path)

    REQUIRED_SIGNATURE = "serving_default"
    REQUIRED_OUTPUT = "outputs"

    with open ("/kaggle/input/fingerspelling-character-map/character_to_prediction_index.json", "r") as f:
        character_map = json.load(f)
    rev_character_map = {j:i for i,j in character_map.items()}

    found_signatures = list(interpreter.get_signature_list().keys())

    if REQUIRED_SIGNATURE not in found_signatures:
        raise KernelEvalException('Required input signature not found.')

    prediction_fn = interpreter.get_signature_runner("serving_default")
    output = prediction_fn(inputs=frames)
    prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
    return prediction_str