In [66]:
! pip install tensorflow numpy opencv-python keras pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [67]:
import os
import cv2
import numpy as np
import tensorflow as tf  # Import TensorFlow
import pandas as pd

from tensorflow import keras  # Import Keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf

class VideoProcessor:
  def __init__(self, video_path, label_path, sequence_length=21):
    self.IMAGE_HEIGHT = 224
    self.IMAGE_WIDTH = 224
    self.sequence_length = sequence_length
    self.video_path = video_path
    self.label_path = label_path
    self.frames = []
    self.labels = {}
    
    self.load_labels()
    
  def load_labels(self):
    """Load labels from a csv file"""
    df = pd.read_csv(self.label_path)
    # map a file name with the label
    self.labels = dict(zip(df['filename'], df['class']))
    
  def one_hot_encoding(self, y):
    """One-hot encoding of the labels"""
    # Get unique sorted labels
    sorted_labels = sorted(set(y))  # unique sorted labels
    print(sorted_labels)
    
    # Map labels to indices
    label_to_index = {label: idx for idx, label in enumerate(sorted_labels)}
    
    # Convert labels to indices
    indices = [label_to_index[label] for label in y]
    
    # One-hot encode the labels
    
    y_one_hot = tf.one_hot(indices, depth=len(sorted_labels))
    
    return y_one_hot
  
  def load_single_image(self, file):
    img = cv2.imread(file)
    print(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    img = cv2.resize(img, (self.IMAGE_HEIGHT, self.IMAGE_WIDTH))
    img = tf.keras.applications.resnet50.preprocess_input(img)

    return img

  def load_frames(self):
    '''
      load from the file (images)
      resize the images to 224x224
      normalize the rgb to 0-1
      return the frames
    '''
    X, y = [], []
        
    video_frame_files = sorted([
        f for f in os.listdir(self.video_path) 
        if f.lower().endswith(('.jpg', '.jpeg', '.png'))
    ])

    for files in video_frame_files:
      # for now skip the invert images
      if files.startswith('invert'):
        continue
      img = cv2.imread(os.path.join(self.video_path, files))
      
      if img is None:
        print("Warning: Could not read image file", files)
        
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
      img = cv2.resize(img, (self.IMAGE_HEIGHT, self.IMAGE_WIDTH))
      img = tf.keras.applications.resnet50.preprocess_input(img) 
      
      # Append the image frame to the list
      X.append(np.array(img))
      print("File: ", files)
      if self.labels.get(files, -1) == -1:
        print("Warning: Could not find label for image file", files)
      y.append(self.labels.get(files, -1))
        # Append the label corresponding to the file
      
    # One-hot encode the labels after collecting all the images
    y = self.one_hot_encoding(np.array(y))
    
    # Return the frames and corresponding labels
    return np.array(X), y


In [70]:
class FeatureExtractor:
    def __init__(self, image_height, image_width):
        self.image_height = image_height
        self.image_width = image_width
        self.CHANNEL = 3
    
    def flatten_frames(self, frames):
        '''
        Flatten the images into individual frames
        '''
        # Reshape frames to process through CNN: (1722, 224, 224, 3)
        frames_reshaped = frames.reshape(-1, self.image_height, self.image_width, self.CHANNEL)
        return frames_reshaped

    def extract_features(self, frames):
        num_samples, h, w, c = frames.shape  # Shape of X (1722, 224, 224, 3)
        frames = self.flatten_frames(frames)  # Flatten to (1722, 224, 224, 3)
    
        '''
        Extract features using ResNet50 pre-trained CNN model with transfer learning
        '''
        base_model = tf.keras.applications.ResNet50(weights='imagenet', 
                                                    include_top=False, 
                                                    input_shape=(self.image_height, self.image_width, self.CHANNEL))
    
        # Freeze the base model layers
        base_model.trainable = False
    
        # Apply Global Average Pooling
        x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
        
        # Create a new model with ResNet50 as base and Global Average Pooling
        cnn_model = tf.keras.Model(inputs=base_model.input, outputs=x)
    
        # Extract image features (Shape will be: (1722, 2048) after pooling)
        features = cnn_model.predict(frames)
    
        # The features are already flattened for use in the Dense layers
        return features
    
    def extract_single_image_feature(self, image):
        """
        Extract features from a single image using ResNet50.
        """
    
        # Ensure the image has the correct shape (224, 224, 3)
        if image.shape != (self.image_height, self.image_width, self.CHANNEL):
            raise ValueError(f"Expected image shape ({self.image_height}, {self.image_width}, {self.CHANNEL}), but got {image.shape}")
    
        # Expand dimensions to match batch format (1, 224, 224, 3)
        image = np.expand_dims(image, axis=0)
    
        '''
        Extract features using ResNet50 pre-trained CNN model with transfer learning
        '''
        base_model = tf.keras.applications.ResNet50(weights='imagenet', 
                                                    include_top=False, 
                                                    input_shape=(self.image_height, self.image_width, self.CHANNEL))
    
        # Freeze the base model layers
        base_model.trainable = False
    
        # Apply Global Average Pooling
        x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    
        # Create a new model with ResNet50 as base and Global Average Pooling
        cnn_model = tf.keras.Model(inputs=base_model.input, outputs=x)
    
        # Extract image features (Shape will be: (1, 2048) after pooling)
        features = cnn_model.predict(image)
    
        # Remove batch dimension (1, 2048) → (2048,)
        return features



In [71]:
video_processor = VideoProcessor('Amharic_Sign_Language/train', 'Amharic_Sign_Language/train_labels.csv')
X, y = video_processor.load_frames()

['che', 'gne', 'ha', 'he', 'hhe', 'ke', 'le', 'me', 'ne', 'qe']


In [72]:
print(len(X))
print(len(y))
print("X shape before flattening:", X.shape)
print("Y shape before flattening:", y.shape)

30
30
X shape before flattening: (30, 224, 224, 3)
Y shape before flattening: (30, 10)


In [73]:
feature_extractor = FeatureExtractor(video_processor.IMAGE_HEIGHT, video_processor.IMAGE_WIDTH)
features = feature_extractor.extract_features(X)
print("Feature shape", features.shape)
print("Labels shape", y.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
Feature shape (30, 2048)
Labels shape (30, 10)


In [74]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input

class CNNModel:
    def __init__(self, num_classes, feature_size):
        self.num_classes = num_classes
        self.feature_size = feature_size

    def build_model(self):
        model = Sequential([
            Dense(512, activation='relu', input_shape=(self.feature_size,)),
            Dropout(0.5),
            Dense(256, activation='relu'),
            Dropout(0.3),
            Dense(128, activation='relu'),
            Dense(self.num_classes, activation='softmax')
        ])


        # Compile the model
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

# Create the model with the correct input shape
cnn_model = CNNModel(10, features.shape[1])
model = cnn_model.build_model()

# Fit the model
model.fit(features, y, epochs=20, batch_size=16)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 67ms/step - accuracy: 0.1319 - loss: 2.6409
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.2389 - loss: 2.0245
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.3056 - loss: 2.0500
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.5000 - loss: 1.5150
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.4375 - loss: 1.5029
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.5653 - loss: 1.3135
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.6722 - loss: 0.9793
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.7833 - loss: 0.6707
Epoch 9/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62

<keras.src.callbacks.history.History at 0x7ffacd0265c0>

In [75]:
# validation step 
video_processor = VideoProcessor('Amharic_Sign_Language/test', 'Amharic_Sign_Language/test_labels.csv')
X, y = video_processor.load_frames()

['che', 'gne', 'ha', 'he', 'hhe', 'ke', 'le', 'me', 'ne', 'qe']


In [76]:
feature_extractor = FeatureExtractor(video_processor.IMAGE_HEIGHT, video_processor.IMAGE_WIDTH)
features = feature_extractor.extract_features(X)
print("Feature shape", features.shape)
print("Labels shape", y.shape)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 4s/step
Feature shape (439, 2048)
Labels shape (439, 10)


In [77]:
loss, accuracy = model.evaluate(features, y, batch_size=16)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 1.0000 - loss: 0.0180
Test Accuracy: 100.00%


In [78]:
y_pred = model.predict(features)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y, axis=1)

misclassified = np.where(y_pred_classes != y_true_classes)[0]
print(f"Number of misclassified samples: {len(misclassified)}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Number of misclassified samples: 0


In [79]:
train_filenames = set(pd.read_csv("Amharic_Sign_Language/train_labels.csv")["filename"])
test_filenames = set(pd.read_csv("Amharic_Sign_Language/test_labels.csv")["filename"])
print("Common files:", train_filenames.intersection(test_filenames))

Common files: set()


In [80]:
print(model.summary())


None


In [81]:
! pip install opencv-python mediapipe


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
! wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

[m[m[m[m

In [82]:
# Import Libraries
import cv2
import mediapipe as mp
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [7]:
img = cv2.imread('./qe-test.jpg')

[ WARN:0@126.758] global loadsave.cpp:268 findDecoder imread_('./qe-test.jpg'): can't open/read file: check file path/integrity


In [83]:
video_processor = VideoProcessor('Amharic_Sign_Language/validation', 'Amharic_Sign_Language/test_labels.csv')
X, y = video_processor.load_frames()

[-1]


In [84]:
feature_extractor = FeatureExtractor(video_processor.IMAGE_HEIGHT, video_processor.IMAGE_WIDTH)
features = feature_extractor.extract_features(X)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


In [85]:
model.predict(features)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


array([[0.01506186, 0.09297454, 0.14822005, 0.03323105, 0.01668024,
        0.00318067, 0.04579481, 0.6383235 , 0.00080061, 0.00573265],
       [0.2015174 , 0.2953879 , 0.01305674, 0.40613452, 0.00110205,
        0.01389759, 0.00177737, 0.05499005, 0.00272427, 0.00941203]],
      dtype=float32)