<a href="https://colab.research.google.com/github/0rina/0rina/blob/main/aryna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install mediapipe
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import mediapipe as mp
import os
import random

folder = '/content/drive/My Drive/praca_inzynierska/dataset/SLtest/outputVideo2/'

def load_data():
    X_train = np.load(folder+r"X_train.npy", allow_pickle=True)
    Y_train = np.load(folder+r"Y_train.npy")
    X_test = np.load(folder+r"X_test.npy", allow_pickle=True)
    Y_test = np.load(folder+r"Y_test.npy")

    unique_labels = sorted(set(Y_train))
    label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}

    Y_train = np.array([label_to_idx[label] for label in Y_train])
    Y_test = np.array([label_to_idx[label] for label in Y_test])

    Y_train = to_categorical(Y_train, num_classes=len(unique_labels))
    Y_test = to_categorical(Y_test, num_classes=len(unique_labels))
    print("num_classes: ", len(unique_labels))
    return np.array(X_train), np.array(Y_train), np.array(X_test), np.array(Y_test), unique_labels

def build_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv3D(32, (3, 3, 3), activation='relu', padding='same', input_shape=input_shape)) #padding - krawędzi zerami
    #model.add(BatchNormalization())
    model.add(MaxPooling3D((2, 2, 2)))
    model.add(Conv3D(64, (3, 3, 3), activation='relu', padding='same'))
    #model.add(BatchNormalization())
    model.add(MaxPooling3D((2, 2, 2)))
    model.add(Flatten()) #jednowymiarowe
    model.add(Dense(128, activation='relu'))
    #model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_model(model, X_train, Y_train, X_test, Y_test, batch_size=32, epochs=20):
    history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, Y_test))
    model.save("sign_language_model.h5")
    return history

def test_model(model, X_test, Y_test, unique_labels, video_names):

    #predykcja
    predictions = model.predict(X_test)
    predicted_labels = np.argmax(predictions, axis=1)
    true_labels = np.argmax(Y_test, axis=1)

    #mapowanie etykiet na nazwy
    idx_to_label = {idx: label for idx, label in enumerate(unique_labels)}

    for i in range(len(video_names)):
        video_name = video_names[i]
        predicted_label = idx_to_label[predicted_labels[i]]
        true_label = idx_to_label[true_labels[i]]
        print(f"Video: {video_name}, Predicted: {predicted_label}, True: {true_label}")





Collecting mediapipe
  Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.0-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.9/35.9 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.0-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


v=np.arange(26)
frame_interval = len(v)//25
print (frame_interval)
frames =[i for i in v if i % frame_interval==0]
len(frames)

In [3]:
import os

def process_video_fixed_frames(video_path, frame_size=(64, 64), num_frames=20):


    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps    = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = frame_count // num_frames
    print ("Frame interval:", frame_interval)

    print (video_path, " fps: ", fps, "frames:", frame_count)
    frames = []

    number = 0
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if number % frame_interval==0:
                frame_resized = cv2.resize(frame, frame_size)
                frame_normalized = frame_resized / 255.0
                frames.append(frame_normalized)
                number += 1

    finally:
        cap.release()

    # Dopasowanie do stałej liczby ramek
    if len(frames) < num_frames:
        while len(frames) < num_frames:
            frames.append(frames[-1])
    elif len(frames) > num_frames:
        frames = frames[:num_frames]

    return np.array(frames)

def process_videos_and_split(folder_path, label, frame_size=(64, 64), num_frames=20):

    video_files = [f for f in os.listdir(folder_path) if f.endswith(('.mp4', '.avi', '.mov'))]

    # Check if video_files is empty
    if not video_files:
        print(f"No video files found in {folder_path}")
        return [], [], [], [] # Return empty lists if no video files are found

    test_video1 = random.choice(video_files)
    test_video2 = random.choice(video_files)

    X_train, Y_train = [], []
    X_test, Y_test = [], []

    for video_file in video_files:
        video_path = os.path.join(folder_path, video_file)
        video_data = process_video_fixed_frames(video_path, frame_size, num_frames)

        if video_file == test_video1 or video_file == test_video2:
            X_test.append(video_data)
            Y_test.append(label)
        else:
            X_train.append(video_data)
            Y_train.append(label)

    return X_train, Y_train, X_test, Y_test

def prepare_dataset(root_folder, frame_size=(64, 64), num_frames=20):

    X_train, Y_train = [], []
    X_test, Y_test = [], []

    for word_folder in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, word_folder)
        if os.path.isdir(folder_path):
            label = word_folder
            x_train, y_train, x_test, y_test = process_videos_and_split(folder_path, label, frame_size, num_frames)

            X_train.extend(x_train)
            Y_train.extend(y_train)
            X_test.extend(x_test)
            Y_test.extend(y_test)

    # Konwersja na numpy arrays
    X_train = np.array(X_train)
    Y_train = np.array(Y_train)
    X_test = np.array(X_test)
    Y_test = np.array(Y_test)

    print (X_train.shape)
    print (Y_train.shape)
    print (X_test.shape)
    print (Y_test.shape)


    np.save(output_base_folder + "X_train.npy", X_train)
    np.save(output_base_folder + "Y_train.npy", Y_train)
    np.save(output_base_folder + "X_test.npy", X_test)
    np.save(output_base_folder + "Y_test.npy", Y_test)

    print(f"Zapisano pliki: X_train.npy, Y_train.npy, X_test.npy, Y_test.npy")

#root_folder = r"C:\Users\Arina\Desktop\inzynierka\dataset\SLtest"
#output_base_folder = r"C:\Users\Arina\Desktop\inzynierka\dataset\outputVideo"

root_folder = '/content/drive/My Drive/praca_inzynierska/dataset/SLtest'
output_base_folder = '/content/drive/My Drive/praca_inzynierska/dataset/SLtest/outputVideo2'

prepare_dataset(root_folder)

Frame interval: 1
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/00583.mp4  fps:  29.97002997002997 frames: 38
Frame interval: 1
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/00585.mp4  fps:  29.97002997002997 frames: 38
Frame interval: 3
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/65006.mp4  fps:  23.976075485966692 frames: 62
Frame interval: 1
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/00586.mp4  fps:  29.97002997002997 frames: 39
Frame interval: 2
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/00584.mp4  fps:  29.97002997002997 frames: 42
Frame interval: 4
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/accent_1.mp4  fps:  29.97 frames: 96
Frame interval: 1
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/accent_5.mp4  fps:  29.97002997002997 frames: 38
Frame interval: 4
/content/drive/My Drive/praca_inzynierska/dataset/SLtest/accent/accent_3.mp4  fps:  29.9700

In [5]:
    X_train, Y_train, X_test, Y_test, unique_labels = load_data()

    #parametry modelu
    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4])
    num_classes = len(unique_labels)

    model = build_model(input_shape, num_classes)

    history = train_model(model, X_train, Y_train, X_test, Y_test, batch_size=32, epochs=30)
    model = load_model("sign_language_model.h5")

    video_names = [f"video_{i}.mp4" for i in range(len(X_test))]

    test_model(model, X_test, Y_test, unique_labels, video_names)

num_classes:  10
Epoch 1/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 823ms/step - accuracy: 0.1862 - loss: 4.0569 - val_accuracy: 0.1579 - val_loss: 2.3057
Epoch 2/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step - accuracy: 0.1649 - loss: 2.2977 - val_accuracy: 0.1579 - val_loss: 2.2963
Epoch 3/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 99ms/step - accuracy: 0.1654 - loss: 2.2483 - val_accuracy: 0.1579 - val_loss: 2.4284
Epoch 4/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - accuracy: 0.3770 - loss: 2.0893 - val_accuracy: 0.1579 - val_loss: 2.4206
Epoch 5/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step - accuracy: 0.4041 - loss: 1.9872 - val_accuracy: 0.1579 - val_loss: 2.3745
Epoch 6/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step - accuracy: 0.3557 - loss: 1.9259 - val_accuracy: 0.1579 - val_loss: 2.7995
Epoch 7/30
[1m6/6[0m [32m



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263ms/step
Video: video_0.mp4, Predicted: accent, True: accent
Video: video_1.mp4, Predicted: about, True: accent
Video: video_2.mp4, Predicted: accept, True: accept
Video: video_3.mp4, Predicted: accept, True: accept
Video: video_4.mp4, Predicted: about, True: a lot
Video: video_5.mp4, Predicted: about, True: a lot
Video: video_6.mp4, Predicted: a, True: a
Video: video_7.mp4, Predicted: a, True: a
Video: video_8.mp4, Predicted: about, True: above
Video: video_9.mp4, Predicted: accident, True: above
Video: video_10.mp4, Predicted: accident, True: abdomen
Video: video_11.mp4, Predicted: accident, True: abdomen
Video: video_12.mp4, Predicted: accent, True: about
Video: video_13.mp4, Predicted: about, True: about
Video: video_14.mp4, Predicted: about, True: accident
Video: video_15.mp4, Predicted: accident, True: accident
Video: video_16.mp4, Predicted: accomplish, True: accomplish
Video: video_17.mp4, Predicted: a lot, True: a