# Downloading the dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("risangbaskoro/wlasl-processed")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/risangbaskoro/wlasl-processed/versions/5


In [None]:
import pandas as pd
import numpy as np
import json
import os

# Opening the dataset

In [None]:
wlas_df = pd.read_json(path + '/WLASL_v0.3.json')

In [None]:
wlas_df.head()

Unnamed: 0,gloss,instances
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra..."
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."


# Extracting the Videos list from the dataset

In [None]:
def get_videos_ids(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids of the current instance

    input: instance json list
    output: list of videos_ids

    """
    videos_list = []
    for ins in json_list:
        video_id = ins['video_id']
        if os.path.exists(f'{path}/videos/{video_id}.mp4'):
            videos_list.append(video_id)
    return videos_list

In [None]:
def get_json_features(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids and url or any other featrue of the current instance

    input: instance json list
    output: list of videos_ids

    """
    videos_ids = []
    videos_urls = []
    for ins in json_list:
        video_id = ins['video_id']
        video_url = ins['url']
        if os.path.exists(f'{path}/videos/{video_id}.mp4'):
            videos_ids.append(video_id)
            videos_urls.append(video_url)
    return videos_ids, videos_urls

In [None]:
with open(path+'/WLASL_v0.3.json', 'r') as data_file:
    json_data = data_file.read()

instance_json = json.loads(json_data)

In [None]:
get_videos_ids(instance_json[0]['instances'])[0]

'69241'

In [None]:
len(get_videos_ids(instance_json[0]['instances']))

6

In [None]:
wlas_df['videos_ids'] = wlas_df['instances'].apply(get_videos_ids)

In [None]:
features_df = pd.DataFrame(columns=['gloss', 'video_id', 'url'])
for row in wlas_df.iterrows():
#     print(row[1][1])
    ids, urls = get_json_features(row[1][1])
    word = [row[1][0]] * len(ids)
    df = pd.DataFrame(list(zip(word, ids, urls)), columns = features_df.columns)
    features_df = pd.concat([features_df,df], ignore_index=True)

  ids, urls = get_json_features(row[1][1])
  word = [row[1][0]] * len(ids)


# Task Dataframe


In [None]:
features_df.index.name = 'index'
features_df

Unnamed: 0_level_0,gloss,video_id,url
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,book,69241,http://aslbricks.org/New/ASL-Videos/book.mp4
1,book,07069,https://signstock.blob.core.windows.net/signsc...
2,book,07068,https://s3-us-west-1.amazonaws.com/files.start...
3,book,07070,https://media.asldeafined.com/vocabulary/14666...
4,book,07099,http://www.aslsearch.com/signs/videos/book.mp4
...,...,...,...
11975,wheelchair,63047,https://www.signingsavvy.com/signs/mp4/5/5233.mp4
11976,wheelchair,63050,http://www.aslsearch.com/signs/videos/wheelcha...
11977,whistle,63186,https://media.spreadthesign.com/video/mp4/13/9...
11978,whistle,63188,https://www.signingsavvy.com/signs/mp4/9/9961.mp4


In [None]:
features_df.to_csv('features_df.csv', index=False)

In [None]:
import pandas as pd
import requests
from pathlib import Path

# Load the dataset
data = pd.read_csv("features_df.csv")  # Replace with the actual file name

# Create a directory to save the videos
video_dir = Path("videos")
video_dir.mkdir(exist_ok=True)

# Download videos from the dataset
for index, row in data.iterrows():
    video_path = video_dir / f"{row['video_id']}.mp4"
    if not video_path.exists():  # Skip if already downloaded
        try:
            response = requests.get(row['url'], stream=True, timeout=30)
            if response.status_code == 200:
                with open(video_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=1024):
                        f.write(chunk)
                print(f"Downloaded: {row['gloss']} ({row['video_id']})")
            else:
                print(f"Failed to download: {row['url']}")
        except requests.RequestException as e:
            print(f"Error downloading {row['url']}: {e}")
    else:
        print(f"Already exists: {row['gloss']} ({row['video_id']})")


Downloaded: book (69241)
Downloaded: book (7069)
Downloaded: book (7068)
Downloaded: book (7070)
Error downloading http://www.aslsearch.com/signs/videos/book.mp4: HTTPConnectionPool(host='www.aslsearch.com', port=80): Max retries exceeded with url: /signs/videos/book.mp4 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ae40195f710>: Failed to resolve 'www.aslsearch.com' ([Errno -2] Name or service not known)"))
Downloaded: book (7074)
Downloaded: drink (69302)
Failed to download: https://aslsignbank.haskins.yale.edu/dictionary/protected_media/glossvideo/ASL/DR/DRINK-119.mp4
Downloaded: drink (17710)
Error downloading http://www.aslsearch.com/signs/videos/drink-alcohol.mp4: HTTPConnectionPool(host='www.aslsearch.com', port=80): Max retries exceeded with url: /signs/videos/drink-alcohol.mp4 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ae40195e8d0>: Failed to resolve 'www.aslsearch.com' ([Errno -2] Name or service not known

KeyboardInterrupt: 

In [None]:
wlas_df.head()

Unnamed: 0,gloss,instances,videos_ids
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]"
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]"


In [None]:
for index, row in data.iterrows():
    gloss_folder = video_dir / row['gloss']
    gloss_folder.mkdir(exist_ok=True, parents=True)
    video_path = video_dir / f"{row['video_id']}.mp4"
    if video_path.exists():
        video_path.rename(gloss_folder / f"{row['video_id']}.mp4")


In [None]:
import cv2
from pathlib import Path

# Define the directory paths
video_dir = Path("videos")
frame_dir = Path("frames")
frame_dir.mkdir(exist_ok=True)

# Extract frames from each video
for gloss_folder in video_dir.iterdir():
    if gloss_folder.is_dir():  # Each folder contains videos for a gloss
        gloss_frame_dir = frame_dir / gloss_folder.name
        gloss_frame_dir.mkdir(exist_ok=True)
        for video_file in gloss_folder.glob("*.mp4"):
            cap = cv2.VideoCapture(str(video_file))
            frame_count = 0
            video_frame_dir = gloss_frame_dir / video_file.stem
            video_frame_dir.mkdir(exist_ok=True)
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame_path = video_frame_dir / f"frame_{frame_count:04d}.jpg"
                cv2.imwrite(str(frame_path), frame)
                frame_count += 1
            cap.release()
            print(f"Extracted frames from {video_file.name}")


In [None]:
!pip install mediapipe

In [None]:
import mediapipe as mp
import cv2
import numpy as np

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Function to extract keypoints from a frame
def extract_keypoints(frame):
    with mp_hands.Hands(static_image_mode=True, max_num_hands=2) as hands:
        results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if results.multi_hand_landmarks:
            keypoints = []
            for hand_landmarks in results.multi_hand_landmarks:
                for landmark in hand_landmarks.landmark:
                    keypoints.extend([landmark.x, landmark.y, landmark.z])
            return np.array(keypoints).flatten()
        else:
            return np.zeros(63)  # No hands detected

# Process frames to extract keypoints
keypoint_dir = Path("keypoints")
keypoint_dir.mkdir(exist_ok=True)

for gloss_folder in frame_dir.iterdir():
    if gloss_folder.is_dir():
        gloss_keypoint_dir = keypoint_dir / gloss_folder.name
        gloss_keypoint_dir.mkdir(exist_ok=True)
        for video_frame_dir in gloss_folder.iterdir():
            keypoints_file = gloss_keypoint_dir / f"{video_frame_dir.name}.npy"
            all_keypoints = []
            for frame_file in sorted(video_frame_dir.glob("*.jpg")):
                frame = cv2.imread(str(frame_file))
                keypoints = extract_keypoints(frame)
                all_keypoints.append(keypoints)
            np.save(keypoints_file, np.array(all_keypoints))
            print(f"Saved keypoints for {video_frame_dir.name}")


# Dataset Classes Analysis

In [None]:
wlas_df['samples_num'] = wlas_df['videos_ids'].apply(len)

In [None]:
wlas_df.head()

Unnamed: 0,gloss,instances,videos_ids,samples_num
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]",6
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177...",15
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123...",14
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057...",16
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]",7


In [None]:
print("minimum number of samples for a word:", wlas_df['samples_num'].min())
print("maximum number of samples for a word:", wlas_df['samples_num'].max())

minimum number of samples for a word: 2
maximum number of samples for a word: 16


In [None]:
words_sample_counts = wlas_df[['gloss', 'samples_num']].groupby('samples_num').agg({"gloss":['count', ', '.join]})

In [None]:
words_sample_counts

Unnamed: 0_level_0,gloss,gloss
Unnamed: 0_level_1,count,join
samples_num,Unnamed: 1_level_2,Unnamed: 2_level_2
2,14,"gloves, careless, wash face, curtain, grey, la..."
3,76,"garage, parents, boots, excuse, furniture, rep..."
4,335,"hello, newspaper, asl, that, will, cards, hate..."
5,511,"clothes, table, movie, clock, pencil, behind, ..."
6,402,"book, birthday, need, have, knife, read, name,..."
7,307,"chair, dance, eat, forget, but, jacket, paint,..."
8,151,"all, blue, hearing, wrong, color, enjoy, time,..."
9,100,"fine, finish, now, can, hat, kiss, cow, meet, ..."
10,48,"year, black, hot, like, many, orange, fish, gr..."
11,26,"deaf, no, walk, mother, woman, dog, family, ap..."


In [None]:
words_sample_counts.loc[2].values[1]

'gloves, careless, wash face, curtain, grey, lamp, look at, meaning, post, propaganda, ski, smoking, stepfather, tv'

# Training

In [None]:
import os
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.20 sounddevice-0.5.1


In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.9)

In [None]:
# Download latest version
asl_path = kagglehub.dataset_download("kapillondhe/american-sign-language")

print("Path to dataset files:", asl_path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/kapillondhe/american-sign-language?dataset_version_number=1...


100%|██████████| 4.64G/4.64G [00:51<00:00, 97.1MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/kapillondhe/american-sign-language/versions/1


In [None]:
!cd

/bin/bash: line 1: cd: @asl_path: No such file or directory


In [None]:
train = pd.read_csv(f'{asl_path}/asl-signs/train.csv')

FileNotFoundError: [Errno 2] No such file or directory: '/root/.cache/kagglehub/datasets/kapillondhe/american-sign-language/versions/1/asl-signs/train.csv'

# New Dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd

In [3]:
train_path = "/content/drive/MyDrive/ASL_Set/train/"

In [4]:
train_df = pd.read_csv(f"{train_path}_annotations.csv")

In [5]:
train_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,U7_jpg.rf.0037faea78f8a89329a93006132921b3.jpg,390,390,U,38,69,347,389
1,P12_jpg.rf.0046c1c30abbbccd31716c5b2ad835b9.jpg,372,372,P,84,203,330,332
2,K4_jpg.rf.00821732715c9137b8060360770ea1d8.jpg,372,372,K,42,12,351,369
3,W6_jpg.rf.00d19bc3a49f6469e2afa3aa92f14ff4.jpg,412,412,W,22,73,377,412
4,J30_jpg.rf.00d20e595026b31773ded47509545471.jpg,382,382,J,122,204,250,334


In [6]:
train_df = train_df.assign(
    letter=train_df["filename"].str[0]
)

In [7]:
train_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,letter
0,U7_jpg.rf.0037faea78f8a89329a93006132921b3.jpg,390,390,U,38,69,347,389,U
1,P12_jpg.rf.0046c1c30abbbccd31716c5b2ad835b9.jpg,372,372,P,84,203,330,332,P
2,K4_jpg.rf.00821732715c9137b8060360770ea1d8.jpg,372,372,K,42,12,351,369,K
3,W6_jpg.rf.00d19bc3a49f6469e2afa3aa92f14ff4.jpg,412,412,W,22,73,377,412,W
4,J30_jpg.rf.00d20e595026b31773ded47509545471.jpg,382,382,J,122,204,250,334,J


In [None]:
labels = train_df["class"].unique()

labels

In [8]:
# For running inference on the TF-Hub module.
import tensorflow as tf

import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

2.17.1
The following GPU devices are available: /device:GPU:0


In [9]:
def display_image(image):
  fig = plt.figure(figsize=(20, 15))
  plt.grid(False)
  plt.imshow(image)

def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
  """Adds a bounding box to an image."""
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = top + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    bbox = font.getbbox(display_str)
    text_width, text_height = bbox[2], bbox[3]
    margin = np.ceil(0.05 * text_height)
    draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
    draw.text((left + margin, text_bottom - text_height - margin),
              display_str,
              fill="black",
              font=font)
    text_bottom -= text_height - 2 * margin


def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):
  """Overlay labeled boxes on an image with formatted scores and label names."""
  colors = list(ImageColor.colormap.values())

  try:
    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf",
                              25)
  except IOError:
    print("Font not found, using default font.")
    font = ImageFont.load_default()

  for i in range(min(boxes.shape[0], max_boxes)):
    if scores[i] >= min_score:
      ymin, xmin, ymax, xmax = tuple(boxes[i])
      display_str = "{}: {}%".format(class_names[i].decode("ascii"),
                                     int(100 * scores[i]))
      color = colors[hash(class_names[i]) % len(colors)]
      image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
      draw_bounding_box_on_image(
          image_pil,
          ymin,
          xmin,
          ymax,
          xmax,
          color,
          font,
          display_str_list=[display_str])
      np.copyto(image, np.array(image_pil))
  return image

In [10]:
module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"

detector = hub.load(module_handle).signatures['default']

In [9]:
def load_img(path):
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  return img

In [None]:
def run_detector(detector, path):
  img = load_img(path)

  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
  start_time = time.time()
  result = detector(converted_img)
  end_time = time.time()

  result = {key:value.numpy() for key,value in result.items()}

  print("Found %d objects." % len(result["detection_scores"]))
  print("Inference time: ", end_time-start_time)

  image_with_boxes = draw_boxes(
      img.numpy(), result["detection_boxes"],
      result["detection_class_entities"], result["detection_scores"])

  display_image(image_with_boxes)

In [None]:
sample_img = train_path+train_df.iloc[2]["filename"]

run_detector(detector, sample_img)

In [12]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.20 sounddevice-0.5.1


In [13]:
import cv2
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Labeling the data

In [21]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [None]:
## 1st: switch to a gpu enabled runtime (from menu)

!git clone https://github.com/opencv/opencv
!git clone https://github.com/opencv/opencv_contrib
!mkdir /content/build
%cd /content/build

!cmake -DOPENCV_EXTRA_MODULES_PATH=/content/opencv_contrib/modules  \
       -DBUILD_SHARED_LIBS=OFF \
       -DBUILD_TESTS=OFF \
       -DBUILD_PERF_TESTS=OFF \
       -DBUILD_EXAMPLES=OFF \
       -DWITH_OPENEXR=OFF \
       -DWITH_CUDA=ON \
       -DWITH_CUBLAS=ON \
       -DWITH_CUDNN=ON \
       -DOPENCV_DNN_CUDA=ON \
       /content/opencv

!make -j8

## the new cv2.so is now in /content/build/lib/python3/
## you are not allowed a normal install on colab, (not root)
## but you can copy it into your work folder and restart the runtime, so it forgets about the old version !

import cv2
cv2.__version__

!ls -l

## save for later use:
!cp  /content/build/lib/python3/cv2.cpython-37m-x86_64-linux-gnu.so   "/content/drive/My Drive/cv2_cuda"

## next time, load it into your work folder:
## dont forget to restart the runtime, so it forgets about the old version !
!cp "/content/drive/My Drive/cv2_cuda/cv2.cpython-37m-x86_64-linux-gnu.so" .

Cloning into 'opencv'...
remote: Enumerating objects: 340647, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (21/21), done.[K
remote: Total 340647 (delta 17), reused 8 (delta 8), pack-reused 340618 (from 4)[K
Receiving objects: 100% (340647/340647), 530.15 MiB | 39.79 MiB/s, done.
Resolving deltas: 100% (237592/237592), done.
Updating files: 100% (7590/7590), done.
Cloning into 'opencv_contrib'...
remote: Enumerating objects: 43126, done.[K
remote: Counting objects: 100% (4013/4013), done.[K
remote: Compressing objects: 100% (1400/1400), done.[K
remote: Total 43126 (delta 2949), reused 2613 (delta 2613), pack-reused 39113 (from 2)[K
Receiving objects: 100% (43126/43126), 152.12 MiB | 45.53 MiB/s, done.
Resolving deltas: 100% (26618/26618), done.
/content/build
  Compatibility with CMake < 3.10 will be removed from a future version of
  CMake.

  Update the VERSION argument <min> value.  Or, use the <min>...<max> syntax
  to tell CMake 

In [None]:
## the new cv2.so is now in /content/build/lib/python3/
## you are not allowed a normal install on colab, (not root)
## but you can copy it into your work folder and restart the runtime, so it forgets about the old version !

import cv2
cv2.__version__

!ls -l

## save for later use:
!cp  /content/build/lib/python3/cv2.cpython-37m-x86_64-linux-gnu.so   "/content/drive/My Drive/cv2_cuda"

## next time, load it into your work folder:
## dont forget to restart the runtime, so it forgets about the old version !
!cp "/content/drive/My Drive/cv2_cuda/cv2.cpython-37m-x86_64-linux-gnu.so" .

In [15]:
data_ = []
labels_ = []

complete = pd.DataFrame({"data": [], "label": []})

In [14]:
train_df["letter"]

Unnamed: 0,letter
0,U
1,P
2,K
3,W
4,J
...,...
1507,C
1508,A
1509,A
1510,W


In [60]:
for idx, row in train_df.iterrows():
  print(row["filename"])

U7_jpg.rf.0037faea78f8a89329a93006132921b3.jpg
P12_jpg.rf.0046c1c30abbbccd31716c5b2ad835b9.jpg
K4_jpg.rf.00821732715c9137b8060360770ea1d8.jpg
W6_jpg.rf.00d19bc3a49f6469e2afa3aa92f14ff4.jpg
J30_jpg.rf.00d20e595026b31773ded47509545471.jpg
M27_jpg.rf.010d328cdc61a634aec1540863be23b9.jpg
N23_jpg.rf.01428a442131e7dcbdb4453df83877e0.jpg
X13_jpg.rf.015a5271f775ab06dd3af7cb59264404.jpg
S28_jpg.rf.015ec81f1c531610f515d4135ae7a335.jpg
D23_jpg.rf.0180ca2b1fbc4ac99d39ff957e9666ea.jpg
D6_jpg.rf.01867634b70d3f9ea5adbb09d9643830.jpg
Z15_jpg.rf.01dae2ff4bb13d57fb3555e92eeb4778.jpg
F28_jpg.rf.01de9888a0d78bebc1fbb6b841fdef6e.jpg
O1_jpg.rf.01eb8369467a4e02535411288eef34f7.jpg
J2_jpg.rf.01f65b3db913fe28c20d99cf89124c9f.jpg
T6_jpg.rf.02bb707b9ac0fb76c97517ccf4f47ab4.jpg
J34_jpg.rf.0263901188a9de96c9bca11274a772ab.jpg
E21_jpg.rf.02bd3a1ec0e99694816e883b8de32942.jpg
H10_jpg.rf.02fd419c4bf7c570ee19d8813124842d.jpg
S25_jpg.rf.02bfc4e839ba3b795c16dc1a21f958dc.jpg
T22_jpg.rf.02e1fc73db34427a44a8888dc77003c6.jpg

In [16]:
for idx, row in train_df.iterrows():
  data_aux = []

  img = cv2.imread(train_path+row["filename"])
  img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  results = hands.process(img_rgb)

  if results.multi_hand_landmarks:
    for hand_landmarks in results.multi_hand_landmarks:
      for i in range(len(hand_landmarks.landmark)):
        x,y = hand_landmarks.landmark[i].x, hand_landmarks.landmark[i].y
        data_aux.append(x)
        data_aux.append(y)

    data_.append(data_aux)
    labels_.append(row["letter"])

#   plt.figure()
#   plt.imshow(img_rgb)

# plt.show()

KeyboardInterrupt: 

In [None]:
labels_

In [40]:
pd.DataFrame({'data': data_, 'label': labels_})

Unnamed: 0,data,label
0,"[0.42192527651786804, 0.9444795250892639, 0.34...",0 U 1 P 2 K 3 W 4 ...
1,"[0.7108935117721558, 0.8370058536529541, 0.728...",0 U 1 P 2 K 3 W 4 ...
2,"[0.34697702527046204, 0.9418112635612488, 0.24...",0 U 1 P 2 K 3 W 4 ...
3,"[0.3915402293205261, 0.7744323015213013, 0.472...",0 U 1 P 2 K 3 W 4 ...
4,"[0.4427641034126282, 0.6336749792098999, 0.359...",0 U 1 P 2 K 3 W 4 ...
...,...,...
1248,"[0.4725436866283417, 0.7554713487625122, 0.456...",0 U 1 P 2 K 3 W 4 ...
1249,"[0.5238912105560303, 0.502618134021759, 0.4015...",0 U 1 P 2 K 3 W 4 ...
1250,"[0.7002522349357605, 0.7012301683425903, 0.484...",0 U 1 P 2 K 3 W 4 ...
1251,"[0.6125355362892151, 0.830099880695343, 0.6829...",0 U 1 P 2 K 3 W 4 ...


In [None]:
data_

In [None]:
labels_