In [1]:
import numpy as np
import pandas as pd
import cv2
import os
import ipyplot
import matplotlib.pyplot as plt

In [6]:
data_path = os.path.join("../Data")
train_data_path = os.path.join(data_path, "asl_alphabet_train/asl_alphabet_train")
test_data_path = os.path.join(data_path, "asl_alphabet_test/asl_alphabet_test")

In [3]:
import mediapipe as mp

In [19]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
mp_draw = mp.solutions.download_utils

In [53]:
img_size = 32
array_value = list()
count = -1

for class_names in os.listdir(test_data_path):
    count += 1
    array_value.append(np.zeros((200,200)))
    class_path = os.path.join(test_data_path, class_names)
    for image_name in os.listdir(class_path):
        img = cv2.imread(os.path.join(class_path, image_name), cv2.IMREAD_GRAYSCALE)
        
        img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        results = hands.process(cv2.cvtColor(img_rgb, cv2.COLOR_BGR2RGB))
        
        if results.multi_hand_landmarks:
            h, w, _ = img_rgb.shape
            
            for hand_landmarks in results.multi_hand_landmarks:
                for lm in hand_landmarks.landmark:
                    x = int(lm.x * w)
                    y = int(lm.y * h)
                    
                    array_value[count][y][x] = 1
                    cv2.circle(img_rgb, (x, y), 1, (0, 255, 0), -1)
            
        
print(np.sum(array_value[1]))

0.0


In [None]:
def load_images(data_path):
    images = []
    labels = []
    for class_name in os.listdir(data_path):
        class_path = os.path.join(data_path, class_name)
        for image_name in os.listdir(class_path):
            img = cv2.imread(os.path.join(class_path, image_name), cv2.IMREAD_GRAYSCALE)
            plt.imshow(img)
            img = cv2.resize(img, (img_size, img_size))
            img = img / 255.0
            images.append(img)
            labels.append(class_name)
    return np.array(images), np.array(labels)

X, y = load_images(train_data_path)

KeyboardInterrupt: 

In [7]:
X.shape

(78000, 32, 32)

In [8]:
y.shape

(78000,)

In [9]:
def conv2d(image, kernel):
    h, w = image.shape
    kh, kw = kernel.shape
    output = np.zeros((h - kh + 1, w - kw + 1))

    for i in range(h - kh + 1):
        for j in range(w - kw + 1):
            output[i, j] = np.sum(image[i:i+kh, j:j+kw] * kernel)

    return output

In [10]:
def max_pooling(image, size=2, stride=2):
    h, w = image.shape
    output_h = (h - size) // stride + 1
    output_w = (w - size) // stride + 1
    pooled = np.zeros((output_h, output_w))

    for i in range(0, h - size + 1, stride):
        for j in range(0, w - size + 1, stride):
            pooled[i//stride, j//stride] = np.max(image[i:i+size, j:j+size])

    return pooled

In [11]:
def relu(x):
    return np.maximum(0, x)

In [12]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum()

In [13]:
def fully_connected_layer(flattened_input, weights, biases):
    return softmax(np.dot(flattened_input, weights) + biases)

In [14]:
def forward_propagation(image, conv_filter, fc_weights, fc_biases):
    conv_output = conv2d(image, conv_filter)
    conv_output = relu(conv_output)

    pooled_output = max_pooling(conv_output)

    flattened = pooled_output.flatten()

    output = fully_connected_layer(flattened, fc_weights, fc_biases)

    return output

In [15]:
def train(X_train, y_train, learning_rate=0.01, epochs=10):
    conv_filter = np.random.randn(3, 3)
    fc_weights = np.random.randn(64, 26)
    fc_biases = np.random.randn(26)
    
    print("conv_filter: ", conv_filter)
    print("fc_weights: ", fc_weights)
    print("fc_biases: ", fc_biases)

    for epoch in range(epochs):
        total_loss = 0
        for i in range(len(X_train)):
            output = forward_propagation(X_train[i], conv_filter, fc_weights, fc_biases)
            loss = -np.log(output[y_train[i]])
            total_loss += loss

            fc_grad = output
            fc_grad[y_train[i]] -= 1
            fc_weights -= learning_rate * np.outer(fc_grad, X_train[i].flatten())
            fc_biases -= learning_rate * fc_grad

        print(f"Epoch {epoch+1}, Loss: {total_loss}")

train(X, y)

conv_filter:  [[-2.01014735  1.08001725  0.88481771]
 [ 2.43104581 -0.62850344 -0.61230199]
 [-0.78293833  0.18697731  2.20280523]]
fc_weights:  [[-2.14598832  0.05075141 -0.74488799 ... -0.75301928  1.27031607
  -0.71764105]
 [ 0.9304432   0.34991896  0.40202498 ... -0.02415371  0.55419188
  -0.9353328 ]
 [-2.16179733 -0.2186046  -0.33608412 ...  1.23067231 -1.46921082
   0.36264965]
 ...
 [ 0.59939567  0.83651017 -0.5421115  ...  0.9141237   0.47452981
  -0.50173117]
 [ 0.78187692  1.13244268  2.64217854 ... -0.22204995  0.57783624
  -2.61830318]
 [ 0.38281497  2.47241814  0.10218848 ... -0.30364843 -1.0711663
  -1.32767867]]
fc_biases:  [-0.23546092  1.19163136 -0.95239302  0.81613642  1.83626114  0.26098409
 -0.01271884  0.09700358 -0.64710917 -0.77557437 -0.05571881 -0.14608024
 -0.8819245  -1.56672708  0.55238811 -1.39075298  0.50819919 -1.12743502
  0.06908873  1.1110826  -0.93041405 -0.67314399  0.88235817  1.42337148
 -0.23363341  0.39639979]


ValueError: shapes (225,) and (64,26) not aligned: 225 (dim 0) != 64 (dim 0)