# Attempt at training videos.

## Downloading the dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("risangbaskoro/wlasl-processed")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/risangbaskoro/wlasl-processed/versions/5


In [None]:
import pandas as pd
import numpy as np
import json
import os

## Opening the dataset

In [None]:
wlas_df = pd.read_json(path + '/WLASL_v0.3.json')

In [None]:
wlas_df.head()

Unnamed: 0,gloss,instances
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra..."
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_..."


## Extracting the Videos list from the dataset

In [None]:
def get_videos_ids(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids of the current instance

    input: instance json list
    output: list of videos_ids

    """
    videos_list = []
    for ins in json_list:
        video_id = ins['video_id']
        if os.path.exists(f'{path}/videos/{video_id}.mp4'):
            videos_list.append(video_id)
    return videos_list

In [None]:
def get_json_features(json_list):
    """
    function to check if the video id is available in the dataset
    and return the viedos ids and url or any other featrue of the current instance

    input: instance json list
    output: list of videos_ids

    """
    videos_ids = []
    videos_urls = []
    for ins in json_list:
        video_id = ins['video_id']
        video_url = ins['url']
        if os.path.exists(f'{path}/videos/{video_id}.mp4'):
            videos_ids.append(video_id)
            videos_urls.append(video_url)
    return videos_ids, videos_urls

In [None]:
with open(path+'/WLASL_v0.3.json', 'r') as data_file:
    json_data = data_file.read()

instance_json = json.loads(json_data)

In [None]:
get_videos_ids(instance_json[0]['instances'])[0]

'69241'

In [None]:
len(get_videos_ids(instance_json[0]['instances']))

6

In [None]:
wlas_df['videos_ids'] = wlas_df['instances'].apply(get_videos_ids)

In [None]:
features_df = pd.DataFrame(columns=['gloss', 'video_id', 'url'])
for row in wlas_df.iterrows():
#     print(row[1][1])
    ids, urls = get_json_features(row[1][1])
    word = [row[1][0]] * len(ids)
    df = pd.DataFrame(list(zip(word, ids, urls)), columns = features_df.columns)
    features_df = pd.concat([features_df,df], ignore_index=True)

  ids, urls = get_json_features(row[1][1])
  word = [row[1][0]] * len(ids)


## Task Dataframe


In [None]:
features_df.index.name = 'index'
features_df

Unnamed: 0_level_0,gloss,video_id,url
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,book,69241,http://aslbricks.org/New/ASL-Videos/book.mp4
1,book,07069,https://signstock.blob.core.windows.net/signsc...
2,book,07068,https://s3-us-west-1.amazonaws.com/files.start...
3,book,07070,https://media.asldeafined.com/vocabulary/14666...
4,book,07099,http://www.aslsearch.com/signs/videos/book.mp4
...,...,...,...
11975,wheelchair,63047,https://www.signingsavvy.com/signs/mp4/5/5233.mp4
11976,wheelchair,63050,http://www.aslsearch.com/signs/videos/wheelcha...
11977,whistle,63186,https://media.spreadthesign.com/video/mp4/13/9...
11978,whistle,63188,https://www.signingsavvy.com/signs/mp4/9/9961.mp4


In [None]:
features_df.to_csv('features_df.csv', index=False)

In [None]:
import pandas as pd
import requests
from pathlib import Path

# Load the dataset
data = pd.read_csv("features_df.csv")  # Replace with the actual file name

# Create a directory to save the videos
video_dir = Path("videos")
video_dir.mkdir(exist_ok=True)

# Download videos from the dataset
for index, row in data.iterrows():
    video_path = video_dir / f"{row['video_id']}.mp4"
    if not video_path.exists():  # Skip if already downloaded
        try:
            response = requests.get(row['url'], stream=True, timeout=30)
            if response.status_code == 200:
                with open(video_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=1024):
                        f.write(chunk)
                print(f"Downloaded: {row['gloss']} ({row['video_id']})")
            else:
                print(f"Failed to download: {row['url']}")
        except requests.RequestException as e:
            print(f"Error downloading {row['url']}: {e}")
    else:
        print(f"Already exists: {row['gloss']} ({row['video_id']})")


Downloaded: book (69241)
Downloaded: book (7069)
Downloaded: book (7068)
Downloaded: book (7070)
Error downloading http://www.aslsearch.com/signs/videos/book.mp4: HTTPConnectionPool(host='www.aslsearch.com', port=80): Max retries exceeded with url: /signs/videos/book.mp4 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ae40195f710>: Failed to resolve 'www.aslsearch.com' ([Errno -2] Name or service not known)"))
Downloaded: book (7074)
Downloaded: drink (69302)
Failed to download: https://aslsignbank.haskins.yale.edu/dictionary/protected_media/glossvideo/ASL/DR/DRINK-119.mp4
Downloaded: drink (17710)
Error downloading http://www.aslsearch.com/signs/videos/drink-alcohol.mp4: HTTPConnectionPool(host='www.aslsearch.com', port=80): Max retries exceeded with url: /signs/videos/drink-alcohol.mp4 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7ae40195e8d0>: Failed to resolve 'www.aslsearch.com' ([Errno -2] Name or service not known

KeyboardInterrupt: 

In [None]:
wlas_df.head()

Unnamed: 0,gloss,instances,videos_ids
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]"
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]"


In [None]:
for index, row in data.iterrows():
    gloss_folder = video_dir / row['gloss']
    gloss_folder.mkdir(exist_ok=True, parents=True)
    video_path = video_dir / f"{row['video_id']}.mp4"
    if video_path.exists():
        video_path.rename(gloss_folder / f"{row['video_id']}.mp4")


In [None]:
import cv2
from pathlib import Path

# Define the directory paths
video_dir = Path("videos")
frame_dir = Path("frames")
frame_dir.mkdir(exist_ok=True)

# Extract frames from each video
for gloss_folder in video_dir.iterdir():
    if gloss_folder.is_dir():  # Each folder contains videos for a gloss
        gloss_frame_dir = frame_dir / gloss_folder.name
        gloss_frame_dir.mkdir(exist_ok=True)
        for video_file in gloss_folder.glob("*.mp4"):
            cap = cv2.VideoCapture(str(video_file))
            frame_count = 0
            video_frame_dir = gloss_frame_dir / video_file.stem
            video_frame_dir.mkdir(exist_ok=True)
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame_path = video_frame_dir / f"frame_{frame_count:04d}.jpg"
                cv2.imwrite(str(frame_path), frame)
                frame_count += 1
            cap.release()
            print(f"Extracted frames from {video_file.name}")


In [None]:
!pip install mediapipe

In [None]:
import mediapipe as mp
import cv2
import numpy as np

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Function to extract keypoints from a frame
def extract_keypoints(frame):
    with mp_hands.Hands(static_image_mode=True, max_num_hands=2) as hands:
        results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if results.multi_hand_landmarks:
            keypoints = []
            for hand_landmarks in results.multi_hand_landmarks:
                for landmark in hand_landmarks.landmark:
                    keypoints.extend([landmark.x, landmark.y, landmark.z])
            return np.array(keypoints).flatten()
        else:
            return np.zeros(63)  # No hands detected

# Process frames to extract keypoints
keypoint_dir = Path("keypoints")
keypoint_dir.mkdir(exist_ok=True)

for gloss_folder in frame_dir.iterdir():
    if gloss_folder.is_dir():
        gloss_keypoint_dir = keypoint_dir / gloss_folder.name
        gloss_keypoint_dir.mkdir(exist_ok=True)
        for video_frame_dir in gloss_folder.iterdir():
            keypoints_file = gloss_keypoint_dir / f"{video_frame_dir.name}.npy"
            all_keypoints = []
            for frame_file in sorted(video_frame_dir.glob("*.jpg")):
                frame = cv2.imread(str(frame_file))
                keypoints = extract_keypoints(frame)
                all_keypoints.append(keypoints)
            np.save(keypoints_file, np.array(all_keypoints))
            print(f"Saved keypoints for {video_frame_dir.name}")


## Dataset Classes Analysis

In [None]:
wlas_df['samples_num'] = wlas_df['videos_ids'].apply(len)

In [None]:
wlas_df.head()

Unnamed: 0,gloss,instances,videos_ids,samples_num
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]",6
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177...",15
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123...",14
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057...",16
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]",7


In [None]:
print("minimum number of samples for a word:", wlas_df['samples_num'].min())
print("maximum number of samples for a word:", wlas_df['samples_num'].max())

minimum number of samples for a word: 2
maximum number of samples for a word: 16


In [None]:
words_sample_counts = wlas_df[['gloss', 'samples_num']].groupby('samples_num').agg({"gloss":['count', ', '.join]})

In [None]:
words_sample_counts

Unnamed: 0_level_0,gloss,gloss
Unnamed: 0_level_1,count,join
samples_num,Unnamed: 1_level_2,Unnamed: 2_level_2
2,14,"gloves, careless, wash face, curtain, grey, la..."
3,76,"garage, parents, boots, excuse, furniture, rep..."
4,335,"hello, newspaper, asl, that, will, cards, hate..."
5,511,"clothes, table, movie, clock, pencil, behind, ..."
6,402,"book, birthday, need, have, knife, read, name,..."
7,307,"chair, dance, eat, forget, but, jacket, paint,..."
8,151,"all, blue, hearing, wrong, color, enjoy, time,..."
9,100,"fine, finish, now, can, hat, kiss, cow, meet, ..."
10,48,"year, black, hot, like, many, orange, fish, gr..."
11,26,"deaf, no, walk, mother, woman, dog, family, ap..."


In [None]:
words_sample_counts.loc[2].values[1]

'gloves, careless, wash face, curtain, grey, lamp, look at, meaning, post, propaganda, ski, smoking, stepfather, tv'

## Training

In [2]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.20 sounddevice-0.5.1


In [3]:
import os
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.9)

In [6]:
import kagglehub

In [8]:
# Download latest version
asl_path = kagglehub.dataset_download("datamunge/sign-language-mnist")

print("Path to dataset files:", asl_path)

Path to dataset files: /root/.cache/kagglehub/datasets/datamunge/sign-language-mnist/versions/1


In [9]:
train = pd.read_csv(f'{asl_path}/sign_mnist_train.csv')

In [10]:
train

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,3,107,118,127,134,139,143,146,150,153,...,207,207,207,207,206,206,206,204,203,202
1,6,155,157,156,156,156,157,156,158,158,...,69,149,128,87,94,163,175,103,135,149
2,2,187,188,188,187,187,186,187,188,187,...,202,201,200,199,198,199,198,195,194,195
3,2,211,211,212,212,211,210,211,210,210,...,235,234,233,231,230,226,225,222,229,163
4,13,164,167,170,172,176,179,180,184,185,...,92,105,105,108,133,163,157,163,164,179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27450,13,189,189,190,190,192,193,193,193,193,...,132,165,99,77,52,200,234,200,222,225
27451,23,151,154,157,158,160,161,163,164,166,...,198,198,198,198,198,196,195,195,195,194
27452,18,174,174,174,174,174,175,175,174,173,...,121,196,209,208,206,204,203,202,200,200
27453,17,177,181,184,185,187,189,190,191,191,...,119,56,27,58,102,79,47,64,87,93


# Old Code

In [None]:
# For running inference on the TF-Hub module.
import tensorflow as tf

import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

2.17.1
The following GPU devices are available: /device:GPU:0


In [None]:
def display_image(image):
  fig = plt.figure(figsize=(20, 15))
  plt.grid(False)
  plt.imshow(image)

def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
  """Adds a bounding box to an image."""
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = top + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    bbox = font.getbbox(display_str)
    text_width, text_height = bbox[2], bbox[3]
    margin = np.ceil(0.05 * text_height)
    draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
    draw.text((left + margin, text_bottom - text_height - margin),
              display_str,
              fill="black",
              font=font)
    text_bottom -= text_height - 2 * margin


def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):
  """Overlay labeled boxes on an image with formatted scores and label names."""
  colors = list(ImageColor.colormap.values())

  try:
    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf",
                              25)
  except IOError:
    print("Font not found, using default font.")
    font = ImageFont.load_default()

  for i in range(min(boxes.shape[0], max_boxes)):
    if scores[i] >= min_score:
      ymin, xmin, ymax, xmax = tuple(boxes[i])
      display_str = "{}: {}%".format(class_names[i].decode("ascii"),
                                     int(100 * scores[i]))
      color = colors[hash(class_names[i]) % len(colors)]
      image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
      draw_bounding_box_on_image(
          image_pil,
          ymin,
          xmin,
          ymax,
          xmax,
          color,
          font,
          display_str_list=[display_str])
      np.copyto(image, np.array(image_pil))
  return image

In [None]:
module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"

detector = hub.load(module_handle).signatures['default']

In [None]:
def load_img(path):
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  return img

In [None]:
def run_detector(detector, path):
  img = load_img(path)

  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
  start_time = time.time()
  result = detector(converted_img)
  end_time = time.time()

  result = {key:value.numpy() for key,value in result.items()}

  print("Found %d objects." % len(result["detection_scores"]))
  print("Inference time: ", end_time-start_time)

  image_with_boxes = draw_boxes(
      img.numpy(), result["detection_boxes"],
      result["detection_class_entities"], result["detection_scores"])

  display_image(image_with_boxes)

In [None]:
sample_img = train_path+train_df.iloc[2]["filename"]

run_detector(detector, sample_img)

In [None]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.20-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.20 sounddevice-0.5.1


# WHat's up

# Working with a Kaggle Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Opening the dataset

In [12]:
import pandas as pd

In [61]:
train_path = "/content/SwampHacks_X/ASL_Set/train/"

In [18]:
train_df = pd.read_csv(f"{train_path}_annotations.csv")

In [19]:
train_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,U7_jpg.rf.0037faea78f8a89329a93006132921b3.jpg,390,390,U,38,69,347,389
1,P12_jpg.rf.0046c1c30abbbccd31716c5b2ad835b9.jpg,372,372,P,84,203,330,332
2,K4_jpg.rf.00821732715c9137b8060360770ea1d8.jpg,372,372,K,42,12,351,369
3,W6_jpg.rf.00d19bc3a49f6469e2afa3aa92f14ff4.jpg,412,412,W,22,73,377,412
4,J30_jpg.rf.00d20e595026b31773ded47509545471.jpg,382,382,J,122,204,250,334


In [20]:
labels = train_df["class"].unique()

labels

array(['U', 'P', 'K', 'W', 'J', 'M', 'N', 'X', 'S', 'D', 'Z', 'F', 'O',
       'T', 'E', 'H', 'C', 'I', 'L', 'B', 'Q', 'V', 'G', 'Y', 'A', 'R'],
      dtype=object)

## Labeling the data

In [None]:
print(cv2.getBuildInformation())


  Version control:               4.10.0-dirty

  Extra modules:
    Location (extra):            /io/opencv_contrib/modules
    Version control (extra):     4.10.0

  Platform:
    Timestamp:                   2024-06-17T17:56:43Z
    Host:                        Linux 5.15.0-1064-azure x86_64
    CMake:                       3.29.5
    CMake generator:             Unix Makefiles
    CMake build tool:            /bin/gmake
    Configuration:               Release

  CPU/HW features:
    Baseline:                    SSE SSE2 SSE3
      requested:                 SSE3
    Dispatched code generation:  SSE4_1 SSE4_2 FP16 AVX AVX2 AVX512_SKX
      requested:                 SSE4_1 SSE4_2 AVX FP16 AVX2 AVX512_SKX
      SSE4_1 (16 files):         + SSSE3 SSE4_1
      SSE4_2 (1 files):          + SSSE3 SSE4_1 POPCNT SSE4_2
      FP16 (0 files):            + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX
      AVX (8 files):             + SSSE3 SSE4_1 POPCNT SSE4_2 AVX
      AVX2 (36 files):           + 

In [21]:
!pip install mediapipe



## Processing the data

In [22]:
import cv2
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

In [None]:
y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print(f"{round(score*100,2)}% of samples were classified correctly.")

84.0% of samples were classified correctly.


In [None]:
f = open("/content/model.p", "wb")
pickle.dump({"model": model}, f)
f.close()

In [96]:
model.fit(data_tr, labels_tr)

ValueError: setting an array element with a sequence.

In [43]:
img = cv2.imread(train_path+train_df.iloc[0]["filename"])
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

results = hands.process(img_rgb)

if results.multi_hand_landmarks:
  for hand_landmarks in results.multi_hand_landmarks:
          mp_drawing.draw_landmarks(
              img,
              hand_landmarks,
              mp_hands.HAND_CONNECTIONS,
              mp_drawing_styles.get_default_hand_landmarks_style(),
              mp_drawing_styles.get_default_hand_connections_style()
          )

    # for i in range(len(hand_landmarks.landmark)):
    #   print(hand_landmarks)


error: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [97]:
def process_data(df, path):

  data_ = []
  labels_ = []

  for idx, row in df.iterrows():
    data_aux = []

    img = cv2.imread(path+row["filename"])
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        for i in range(len(hand_landmarks.landmark)):
          x = hand_landmarks.landmark[i].x
          y = hand_landmarks.landmark[i].y
          data_aux.append(x)
          data_aux.append(y)

      data_.append(data_aux)
      labels_.append(row["class"])

  return {"data": data_, "labels": labels_}


In [98]:
train_data = process_data(train_df, train_path)

## Training the Cleaned up Data

In [31]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [32]:
import numpy as np

In [101]:
np.asarray(train_data["data"]).astype(float)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (1253,) + inhomogeneous part.

In [93]:
data_tr = np.asarray(train_data["data"].values)
labels_tr = np.asarray(train_data["labels"].values)

In [74]:
train_data

Unnamed: 0,data,labels
0,"[0.42192527651786804, 0.9444795250892639, 0.34...",U
1,"[0.7108935117721558, 0.8370058536529541, 0.728...",K
2,"[0.34697702527046204, 0.9418112635612488, 0.24...",W
3,"[0.3915402293205261, 0.7744323015213013, 0.472...",M
4,"[0.4427641034126282, 0.6336749792098999, 0.359...",N
...,...,...
1248,"[0.4725436866283417, 0.7554713487625122, 0.456...",C
1249,"[0.5238912105560303, 0.502618134021759, 0.4015...",A
1250,"[0.7002522349357605, 0.7012301683425903, 0.484...",A
1251,"[0.6125355362892151, 0.830099880695343, 0.6829...",W


In [75]:
# x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()

## Read in the testing data

In [76]:
test_path = "/content/SwampHacks_X/ASL_Set/test/"

In [78]:
test_df = pd.read_csv(f"{test_path}_annotations.csv")

In [79]:
test_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,J9_jpg.rf.213a9e356777a13d336e4988c33a93e1.jpg,416,416,J,88,128,194,251
1,Q7_jpg.rf.07fdf6c096cd2a9be72b4de4a627935d.jpg,416,416,Q,37,153,326,309
2,Z16_jpg.rf.309328aaeb31736f8a93a570d6d4f140.jpg,416,416,Z,49,252,216,368
3,R5_jpg.rf.1e94c77f430ee342744dc9fce202c449.jpg,416,416,R,132,96,386,415
4,Z18_jpg.rf.2cffbd9beaeb50a7d03751c3ce738e81.jpg,416,416,Z,122,227,250,313


In [80]:
testing_dict = process_data(test_df, test_path)

In [81]:
testing_dict.head()

Unnamed: 0,data,labels
0,"[0.21435925364494324, 0.40757396817207336, 0.2...",J
1,"[0.14998582005500793, 0.5611433982849121, 0.31...",Q
2,"[0.34963786602020264, 0.7798722982406616, 0.40...",Z
3,"[0.775671124458313, 0.9154149889945984, 0.6181...",R
4,"[0.44985294342041016, 0.6765767931938171, 0.50...",Z


## Fitting the model

In [1]:
data_tr

NameError: name 'data_tr' is not defined

In [103]:
!pip install numpy==1.23.1

Collecting numpy==1.23.1
  Using cached numpy-1.23.1.tar.gz (10.7 MB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: numpy
  Building wheel for numpy (pyproject.toml) ... [?25l[?25hdone
  Created wheel for numpy: filename=numpy-1.23.1-cp311-cp311-linux_x86_64.whl size=19731115 sha256=be745eb2c68efcf04dc08b09c2238f32064d5a47630e625e75c1c6c441fb2099
  Stored in directory: /root/.cache/pip/wheels/54/67/ec/c3e57b4b51328fb39dd4d63906b0d3bd37a312508e5922682f
Successfully built numpy
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.1
    Uninstalling numpy-1.26.1:
      Successfully uninstalled numpy-1.26.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following depend

## Testing the model

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
data_clean = np.asarray(data)
labels_clean = np.asarray(labels)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(data_clean, labels_clean, test_size=0.2, shuffle=True, stratify=labels_clean)

model = RandomForestClassifier()

In [None]:
model.fit(x_train, y_train)

y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)

print(f"{round(score*100,2)}% of samples were classified correctly.")

In [None]:
import pickle

f = open("/content/SwampHacks_X/AI_BIGDATA/model5a.p", "wb")
pickle.dump({"model": model}, f)
f.close()

# Leapfrog Data

## Reading in the csv

In [None]:
import pandas as pd
import numpy as np

In [None]:
dirty_data = pd.read_csv('/content/SwampHacks_X/LeapMotion_setup/LeapSDK_CLion/samples/output/log_abc1.csv').dropna(axis=1)

In [None]:
dirty_data.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 232 columns):
 #    Column               Dtype  
---   ------               -----  
 0    key                  int64  
 1    letter               object 
 2    nHands               int64  
 3    palmX                float64
 4    palmY                float64
 5    palmZ                float64
 6    palm_dirX            float64
 7    palm_dirY            float64
 8    palm_dirZ            float64
 9    palm_orientX         float64
 10   palm_orientY         float64
 11   palm_orientZ         float64
 12   palm_orientW         float64
 13   arm_prev_jointX      float64
 14   arm_prev_jointY      float64
 15   arm_prev_jointZ      float64
 16   arm_next_jointX      float64
 17   arm_next_jointY      float64
 18   arm_next_jointZ      float64
 19   arm_rotationX        float64
 20   arm_rotationY        float64
 21   arm_rotationZ        float64
 22   arm_rotationW        float64
 23   pinch_dist 

In [None]:
dirty_data.letter.unique()

array(['A', 'B', 'C'], dtype=object)

In [None]:
dirty_data.columns

Index(['key', 'letter', 'nHands', 'palmX', 'palmY', 'palmZ', 'palm_dirX',
       'palm_dirY', 'palm_dirZ', 'palm_orientX',
       ...
       'pinky3_prev_jointX', 'pinky3_prev_jointY', 'pinky3_prev_jointZ',
       'pinky3_nex_jointX', 'pinky3_nex_jointY', 'pinky3_nex_jointZ',
       'pinky3_rotationX', 'pinky3_rotationY', 'pinky3_rotationZ',
       'pinky3_rotationW'],
      dtype='object', length=232)

In [None]:
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('rotation')]

In [None]:
# dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('0')]

In [None]:
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('extended')]

In [None]:
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('Z')]

In [None]:
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('W')]

In [None]:
# dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('arm')]

In [None]:
dirty_data.columns[9:]

Index(['thumb1_prev_jointX', 'thumb1_prev_jointY', 'thumb1_nex_jointX',
       'thumb1_nex_jointY', 'thumb2_prev_jointX', 'thumb2_prev_jointY',
       'thumb2_nex_jointX', 'thumb2_nex_jointY', 'thumb3_prev_jointX',
       'thumb3_prev_jointY', 'thumb3_nex_jointX', 'thumb3_nex_jointY',
       'index1_prev_jointX', 'index1_prev_jointY', 'index1_nex_jointX',
       'index1_nex_jointY', 'index2_prev_jointX', 'index2_prev_jointY',
       'index2_nex_jointX', 'index2_nex_jointY', 'index3_prev_jointX',
       'index3_prev_jointY', 'index3_nex_jointX', 'index3_nex_jointY',
       'middle1_prev_jointX', 'middle1_prev_jointY', 'middle1_nex_jointX',
       'middle1_nex_jointY', 'middle2_prev_jointX', 'middle2_prev_jointY',
       'middle2_nex_jointX', 'middle2_nex_jointY', 'middle3_prev_jointX',
       'middle3_prev_jointY', 'middle3_nex_jointX', 'middle3_nex_jointY',
       'ring1_prev_jointX', 'ring1_prev_jointY', 'ring1_nex_jointX',
       'ring1_nex_jointY', 'ring2_prev_jointX', 'ring2_prev_j

In [None]:
dirty_data = dirty_data.drop(axis=1, columns=["pinch_dist", "pinch_str", "grab_angle", "grab_strength"])

In [None]:
# dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains("1_prev")]
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains("2_prev")]
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains("3_prev")]
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains("0")]

In [None]:
dirty_data.drop(axis=1, columns=["palm_orientX", "palm_orientY"], inplace=True)

In [None]:
dirty_data.drop(axis=1, columns=["palm_dirX", "palm_dirY"], inplace=True)

In [None]:
dirty_data.drop(axis=1, columns=["palmX", "palmY"], inplace=True)

In [None]:
dirty_data.drop(axis=1, columns=["arm_prev_jointY", "arm_prev_jointX"], inplace=True)

In [None]:
dirty_data.columns

Index(['key', 'letter', 'nHands', 'arm_next_jointX', 'arm_next_jointY',
       'thumb1_prev_jointX', 'thumb1_prev_jointY', 'thumb1_nex_jointX',
       'thumb1_nex_jointY', 'thumb2_nex_jointX', 'thumb2_nex_jointY',
       'thumb3_nex_jointX', 'thumb3_nex_jointY', 'index1_prev_jointX',
       'index1_prev_jointY', 'index1_nex_jointX', 'index1_nex_jointY',
       'index2_nex_jointX', 'index2_nex_jointY', 'index3_nex_jointX',
       'index3_nex_jointY', 'middle1_prev_jointX', 'middle1_prev_jointY',
       'middle1_nex_jointX', 'middle1_nex_jointY', 'middle2_nex_jointX',
       'middle2_nex_jointY', 'middle3_nex_jointX', 'middle3_nex_jointY',
       'ring1_prev_jointX', 'ring1_prev_jointY', 'ring1_nex_jointX',
       'ring1_nex_jointY', 'ring2_nex_jointX', 'ring2_nex_jointY',
       'ring3_nex_jointX', 'ring3_nex_jointY', 'pinky1_prev_jointX',
       'pinky1_prev_jointY', 'pinky1_nex_jointX', 'pinky1_nex_jointY',
       'pinky2_nex_jointX', 'pinky2_nex_jointY', 'pinky3_nex_jointX',
       '

In [None]:
dirty_data.shape[1]-3

42

In [None]:
dirty_data.drop(axis=1, columns=["palm_dirX", "palm_dirY", "palmX", "palmY"], inplace=True)

KeyError: "['palm_dirX', 'palm_dirY', 'palmX', 'palmY'] not found in axis"

In [None]:
dirty_data.iloc[0].loc['palmX':]

Unnamed: 0,0
palmX,139.662186
palmY,216.195877
palm_dirX,-0.31307
palm_dirY,0.211538
palm_orientX,0.073216
palm_orientY,0.178055
thumb0_prev_jointX,140.240311
thumb0_prev_jointY,211.605881
thumb0_nex_jointX,140.240311
thumb0_nex_jointY,211.605881


In [None]:
dirty_data = dirty_data.drop(axis=1, columns=["pinch_dist","pinch_str", "grab_angle", "grab_strength"])

In [None]:
dirty_data = dirty_data.loc[:, ~dirty_data.columns.str.contains('arm')]

## Cleaning the data with labeling

In [None]:
pd.set_option('display.max_rows', None)

labels = []
data = []

In [None]:
dirty_data.iloc[0]

Unnamed: 0,0
key,375418
letter,A
nHands,1
arm_next_jointX,165.687149
arm_next_jointY,210.718704
thumb1_prev_jointX,140.240311
thumb1_prev_jointY,211.605881
thumb1_nex_jointX,106.971756
thumb1_nex_jointY,223.20401
thumb2_nex_jointX,89.952545


In [None]:
'''
  dataframe should have data with array of coordinates for each 20 bones of a finger.

  label -> letter

  data -> array of 20 floats
'''
labels.append(dirty_data.iloc[0]["letter"])

In [None]:
for idx, row in dirty_data.iterrows():
  labels.append(row.loc["letter"])
  data.append(row.loc["arm_next_jointX":].values)

In [None]:
len(data)

3000

In [None]:
labels

['A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A',
 'A'

## Training with a RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
data_clean = np.asarray(data)
labels_clean = np.asarray(labels)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(data_clean, labels_clean, test_size=0.2, shuffle=True, stratify=labels_clean)

model = RandomForestClassifier()

In [None]:
model.fit(x_train, y_train)

y_predict = model.predict(x_test)

In [None]:
score = accuracy_score(y_predict, y_test)

print(f"{round(score*100,2)}% of samples were classified correctly.")

100.0% of samples were classified correctly.


## Exporting the model

In [None]:
import pickle

f = open("/content/model4v2.p", "wb")
pickle.dump({"model": model}, f)
f.close()