In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'asl-signs:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F46105%2F5087314%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240423%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240423T005456Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D05de445137be291fccc16e31846b960ae442369be6b9cf76c85d5982dc27fd1c5dc4f9cfd2ded022a600781a52b00cfee68e542bb41dfda85082af356e8d746d5841f6d5f759649c931aebe0935a25026cdc36fd4352c8e285ff10ebca3f422b5928426de9bccd0205651e6553eb9c19caab051ff92390653248efe87c53bd4d13047dddce5e7e904c60b047de829dd928ea3b7008ac7bf5453ec669f5808d85d12eb32f86a61b4ee98b056fcb8ff80bdae6039103aa2fef7289547b942d309dd9e1f14c346a0d8700e6f7988911a44dbc63ea1c8a3a52d12ef8835f3946e6732bab3c4e861835a15178d6886cabbbdcf45222311e8fe7a50bb41ffb832b29ba'

KAGGLE_INPUT_PATH='kaggle/input'
KAGGLE_WORKING_PATH='kaggle/working'
KAGGLE_SYMLINK='kaggle'

os.makedirs(KAGGLE_SYMLINK)
os.makedirs(KAGGLE_INPUT_PATH, 0o777)
os.makedirs(KAGGLE_WORKING_PATH, 0o777)

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading asl-signs, 40118005859 bytes compressed
Downloaded and uncompressed: asl-signs
Data source import complete.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
from glob import glob
import pandas as pd
import torch
import torch.nn as nn

In [5]:
NUM_FRAMES=60

In [7]:
data_files = glob("kaggle/input/asl-signs/train_landmark_files/*/*.parquet", recursive=True)
print(data_files[:10])

['kaggle/input/asl-signs/train_landmark_files/4718/1160474191.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/1187990396.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/2604668083.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/2266974533.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/3835935597.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/2057287272.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/3488774387.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/3210689405.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/2905175954.parquet', 'kaggle/input/asl-signs/train_landmark_files/4718/1996140943.parquet']


In [19]:
example_pq_df = pd.read_parquet(data_files[1])
first_frame_df = example_pq_df[example_pq_df['frame'] == example_pq_df['frame'].iloc[0]]
first_frame_head_df = first_frame_df[first_frame_df['type'] == 'face']
first_frame_pose_df = first_frame_df[first_frame_df['type'] == 'pose']
first_frame_left_hand_df = first_frame_df[first_frame_df['type'] == 'left_hand']
first_frame_right_hand_df = first_frame_df[first_frame_df['type'] == 'right_hand']

print(example_pq_df.head())
print("landmarks per frame", len(first_frame_df))
print("face landmarks ", len(first_frame_head_df))
print("pose landmarks ", len(first_frame_pose_df))
print("left hand landmarks ", len(first_frame_left_hand_df))
print("right hand landmarks ", len(first_frame_right_hand_df))



   frame    row_id  type  landmark_index         x         y         z
0      9  9-face-0  face               0  0.520766  0.544308 -0.045363
1      9  9-face-1  face               1  0.509756  0.507224 -0.059449
2      9  9-face-2  face               2  0.512931  0.520984 -0.039289
3      9  9-face-3  face               3  0.503155  0.481343 -0.032631
4      9  9-face-4  face               4  0.509599  0.497557 -0.059698
landmarks per frame 543
face landmarks  468
pose landmarks  33
left hand landmarks  21
right hand landmarks  21


In [20]:
'''
Methods to either upsample or downsample frames to return NUM_FRAMES freams
'''
def interpolate_frames(pq_df, num_frames=NUM_FRAMES):
    current_frames = pq_df['frame'].unique()
    needed_frames = num_frames - len(current_frames)
    frame_interval = len(current_frames) // (needed_frames + 1)
    for i in range(1, needed_frames + 1):
        frame = i * frame_interval % len(pq_df['frame'].unique())
        end_index_of_first_half = len(pq_df[pq_df['frame'] <= frame])
        pq_df = pd.concat([pq_df[pq_df['frame'] <= frame], pq_df[pq_df['frame'] >= frame]], )
        pq_df = pq_df.reset_index(drop=True)
        pq_df.loc[pq_df.index >= end_index_of_first_half, 'frame'] += 1

    return pq_df

def extract_frames(pq_df, method='uniform' , num_frames=NUM_FRAMES):  
    if method == 'uniform':
        total_frames = len(pq_df['frame'].unique())
        step_size = total_frames // num_frames
        frame_indices_range = range(0, total_frames, step_size)
    elif method == 'end':
        unique_frames = pq_df['frame'].unique()
        frame_indices_range = unique_frames[-num_frames:]
    elif method == 'start':
        unique_frames = pq_df['frame'].unique()
        frame_indices_range = unique_frams[:num_frames]
    frame_indices = list(frame_indices_range)[-num_frames:]
    new_df = pq_df[pq_df['frame'].isin(frame_indices)]
    for index, frame in enumerate(frame_indices):
        new_df.loc[new_df['frame'] == frame, 'frame'] = index
    return new_df



In [21]:
def process_data(data_files):
    df_lists = []
    for i, data  in enumerate(data_files):
        data_df = pd.read_parquet(data)
        new_df = None
        if(i % 100 == 0):
            print(i)
        if len(data_df['frame'].unique()) < NUM_FRAMES:
            new_df = interpolate_frames(data_df)
        elif len(data_df['frame'].unique() > NUM_FRAMES):
            new_df = extract_frames(data_df)
        df_lists.append(new_df)
    return df_lists

            
            
            


In [22]:
processed_data = process_data(data_files)

0
100


KeyboardInterrupt: 

In [None]:
class NN_model(torch.nn.Module):
    def __init__(input_dims):
        super(NN_model, self).__init__()
    
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(input_dims)
        self.activation1 = nn.ReLU()
        self.linear2 = nn.Linear(output_classes)
        
        
        
    