In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm 
import zipfile
import tensorflow as tf


In [2]:
zf = zipfile.ZipFile('asl-signs.zip') 
train_df = pd.read_csv(zf.open('train.csv'))
train = pd.read_csv(zf.open('train.csv'))

In [3]:
# Add ordinally Encoded Sign (assign number to each sign name)
train['sign_ord'] = train['sign'].astype('category').cat.codes

# Dictionaries to translate sign <-> ordinal encoded sign
SIGN2ORD = train[['sign', 'sign_ord']].set_index('sign').squeeze().to_dict()
ORD2SIGN = train[['sign_ord', 'sign']].set_index('sign_ord').squeeze().to_dict()

In [4]:
ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [5]:

def get_model_predictions(model_path, train):
    model_predictions = []
    interpreter = tf.lite.Interpreter(model_path)
    prediction_fn = interpreter.get_signature_runner("serving_default")
    
    for idx, row in tqdm(train.iterrows(), total=train.shape[0]):
        pq_path = row['path']
        pq_file = zf.open(pq_path)
        xyz_np = load_relevant_data_subset(pq_file)
        prediction = prediction_fn(inputs=xyz_np)
        output = prediction['outputs'].squeeze()  # This removes dimensions of size 1
        model_predictions.append(output)


        
    return np.array(model_predictions)

all_model_predictions = np.hstack([
    get_model_predictions("./model_119.tflite", train).reshape(train.shape[0], -1),
    get_model_predictions("./model_first.tflite", train).reshape(train.shape[0], -1),
    get_model_predictions("./model_newbie.tflite", train).reshape(train.shape[0], -1),
    get_model_predictions("./model_ConvLSTM.tflite", train).reshape(train.shape[0], -1),
])

 30%|██████████▎                        | 27931/94477 [39:34<1:34:17, 11.76it/s]


KeyboardInterrupt: 

In [10]:

#  Create New Training Set
ensemble_train_X = all_model_predictions  # Features from models' predictions
ensemble_train_y = train['sign_ord'].values  # True labels

# Save this DataFrame to a CSV file
csv_file = 'ensemble_training_data.csv'
ensemble_data.to_csv(csv_file, index=False)

# Compress the CSV file into a Zip file
zip_file = 'ensemble_training_data.zip'
with zipfile.ZipFile(zip_file, 'w') as zf:
    zf.write(csv_file, compress_type=zipfile.ZIP_DEFLATED)

print(f"Saved the dataset to {zip_file}")


NameError: name 'ensemble_data' is not defined

In [None]:
input_shape = ensemble_train_X.shape[1]  # Number of features
num_classes = len(np.unique(ensemble_train_y))  # Number of unique classes

model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(input_shape,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(ensemble_train_X, ensemble_train_y, epochs=10, batch_size=32, validation_split=0.1)

# Convert the trained model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model to a file
with open('Final.tflite', 'wb') as f:
    f.write(tflite_model)