# [Sign language dactyl recognition based on machine learning algorithms](https://www.researchgate.net/publication/354349413_Sign_language_dactyl_recognition_based_on_machine_learning_algorithms)

In [1]:
import numpy as np 
import pandas as pd 
import glob
import os
import cv2
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image
import keras
from sklearn.preprocessing import LabelEncoder


In [2]:
TEST_DATA_PATH = '/Users/kamilla/Desktop/masters_diploma/SOTA_MODELS/SOTA_3/data_SOTA_3/green_images/green_test_01_06_mp_2'
TRAIN_DATA_PATH = '/Users/kamilla/Desktop/masters_diploma/SOTA_MODELS/SOTA_3/data_SOTA_3/green_images/green_train_01_06_mp_2'
#VAL_DATA_PATH = '/Users/kamilla/Desktop/masters_diploma/SOTA_MODELS/SOTA_3/data_SOTA_3/temp_val'

# Создадим датафрейм, где будем сохранять путь до картинки и ее лейбл

dactyl = ["А", "Б", "В", "Г", "Е",
          "Ж", "И", "К", "Л", "М",
          "Н", "О", "П", "Р", "С",
          "Т", "У", "Ф", "Х", "Ч",
          "Ш", "Ы", "Э", "Ю", "Я"]

def get_metadata(data_path):
    list_path = []
    list_labels = []
    for label in dactyl:
        label_path = os.path.join(data_path, label, "*")
        image_files = glob.glob(label_path)

        sign_label = [label] * len(image_files)

        list_path.extend(image_files)
        list_labels.extend(sign_label)

    metadata = pd.DataFrame({
        "image_path": list_path,
        "label": list_labels
    })
    return metadata

train_df = get_metadata(TRAIN_DATA_PATH)
print(f"DataFrame for train is ready. Processed {len(train_df)}")
test_df = get_metadata(TEST_DATA_PATH)
print(f"DataFrame for test is ready. Processed {len(test_df)}")
#val_df = get_metadata(VAL_DATA_PATH)
#print(f"DataFrame for val is ready. Processed {len(val_df)}")

DataFrame for train is ready. Processed 2339
DataFrame for test is ready. Processed 259


In [3]:
y_train, x_train = train_df['label'], train_df['image_path']
y_test, x_test = test_df['label'], test_df['image_path']
#y_val, x_val = val_df['label'], val_df['image_path']


In [4]:
le = LabelEncoder()
le.fit(y_train)
y_train_encoded = le.transform(y_train)
y_train_cat = keras.utils.to_categorical(y_train_encoded)

y_test_encoded = le.transform(y_test)
y_test_cat = keras.utils.to_categorical(y_test_encoded)

#y_val_encoded = le.transform(y_val_prep)
#y_val_cat = keras.utils.to_categorical(y_val_encoded)

In [5]:
def load_and_preprocess_image(image_path, target_size=(28,28)):
    image = cv2.imread(image_path)
    image = cv2.resize(image, target_size)
    image_array = np.array(image.flatten())
    return image_array

x_train_images = []
for image_path in x_train:
    image_array = load_and_preprocess_image(image_path)
    x_train_images.append(image_array)
x_train_images = np.array(x_train_images)

x_test_images = []
for image_path in x_test:
    image_array = load_and_preprocess_image(image_path)
    x_test_images.append(image_array)
x_test_images = np.array(x_test_images)


# SVM 

In [6]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# train the model on train set 
model_SVM = SVC() 
model_SVM.fit(x_train_images, y_train_encoded) 

# print prediction results 
#predictions = model.predict(x_test_images) 
#print(classification_report(y_test_encoded, predictions)) 

In [7]:
predictions = model_SVM.predict(x_test_images) 
print(classification_report(y_test_encoded, predictions))

              precision    recall  f1-score   support

           0       0.69      1.00      0.81        11
           1       0.82      0.82      0.82        11
           2       1.00      1.00      1.00        11
           3       0.88      0.78      0.82         9
           4       0.89      0.80      0.84        10
           5       1.00      0.90      0.95        10
           6       0.90      0.75      0.82        12
           7       0.75      0.55      0.63        11
           8       0.38      0.50      0.43        10
           9       0.71      0.50      0.59        10
          10       0.59      0.91      0.71        11
          11       1.00      0.50      0.67         8
          12       0.45      0.50      0.48        10
          13       0.50      0.60      0.55        10
          14       1.00      0.70      0.82        10
          15       0.54      0.64      0.58        11
          16       0.89      0.80      0.84        10
          17       1.00    

In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test_encoded, predictions, average='weighted')
recall = recall_score(y_test_encoded, predictions, average='weighted')
f1 = f1_score(y_test_encoded, predictions, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.7925099426149845
Recall: 0.7567567567567568
F1-score: 0.7606897152009934


In [12]:
# Попробуем поменять параметры 
from sklearn.model_selection import GridSearchCV 
  
# defining parameter range 
param_grid = {'C': [0.0001, 0.001, 0.01, 0.1],  
              'gamma': [0.0001, 0.001, 0.01, 0.1], 
              'kernel': ['poly', 'linear']}  
  
grid_SVM = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3) 
  
# fitting the model for grid search 
grid_SVM.fit(x_train_images, y_train_encoded) 

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5] END C=0.0001, gamma=0.0001, kernel=poly;, score=0.374 total time=  37.0s
[CV 2/5] END C=0.0001, gamma=0.0001, kernel=poly;, score=0.380 total time=  29.5s
[CV 3/5] END C=0.0001, gamma=0.0001, kernel=poly;, score=0.368 total time=  27.7s
[CV 4/5] END C=0.0001, gamma=0.0001, kernel=poly;, score=0.331 total time=  24.8s
[CV 5/5] END C=0.0001, gamma=0.0001, kernel=poly;, score=0.409 total time=  26.7s
[CV 1/5] END C=0.0001, gamma=0.0001, kernel=linear;, score=0.744 total time=  16.4s
[CV 2/5] END C=0.0001, gamma=0.0001, kernel=linear;, score=0.752 total time=  12.3s
[CV 3/5] END C=0.0001, gamma=0.0001, kernel=linear;, score=0.722 total time=  11.9s
[CV 4/5] END C=0.0001, gamma=0.0001, kernel=linear;, score=0.774 total time=  11.8s
[CV 5/5] END C=0.0001, gamma=0.0001, kernel=linear;, score=0.752 total time=  11.6s
[CV 1/5] END C=0.0001, gamma=0.001, kernel=poly;, score=0.382 total time=  13.7s
[CV 2/5] END C=0.0001, gamm

[CV 1/5] END ...C=0.01, gamma=0.01, kernel=poly;, score=0.382 total time=  13.8s
[CV 2/5] END ...C=0.01, gamma=0.01, kernel=poly;, score=0.378 total time=  13.8s
[CV 3/5] END ...C=0.01, gamma=0.01, kernel=poly;, score=0.380 total time=  14.1s
[CV 4/5] END ...C=0.01, gamma=0.01, kernel=poly;, score=0.338 total time=  13.8s
[CV 5/5] END ...C=0.01, gamma=0.01, kernel=poly;, score=0.398 total time=  13.8s
[CV 1/5] END .C=0.01, gamma=0.01, kernel=linear;, score=0.744 total time=  11.6s
[CV 2/5] END .C=0.01, gamma=0.01, kernel=linear;, score=0.752 total time=  11.7s
[CV 3/5] END .C=0.01, gamma=0.01, kernel=linear;, score=0.722 total time=  11.5s
[CV 4/5] END .C=0.01, gamma=0.01, kernel=linear;, score=0.774 total time=  11.7s
[CV 5/5] END .C=0.01, gamma=0.01, kernel=linear;, score=0.752 total time=  11.6s
[CV 1/5] END ....C=0.01, gamma=0.1, kernel=poly;, score=0.382 total time=  13.8s
[CV 2/5] END ....C=0.01, gamma=0.1, kernel=poly;, score=0.378 total time=  13.9s
[CV 3/5] END ....C=0.01, gam

In [13]:
# print best parameter after tuning 
print(grid_SVM.best_params_) 
# print how our model looks after hyper-parameter tuning 
print(grid_SVM.best_estimator_) 
grid_predictions_SVM = grid_SVM.predict(x_test_images)   
# print classification report 
print(classification_report(y_test_encoded, grid_predictions_SVM)) 

{'C': 0.0001, 'gamma': 0.0001, 'kernel': 'linear'}
SVC(C=0.0001, gamma=0.0001, kernel='linear')
              precision    recall  f1-score   support

           0       0.83      0.91      0.87        11
           1       0.73      0.73      0.73        11
           2       0.79      1.00      0.88        11
           3       0.90      1.00      0.95         9
           4       0.88      0.70      0.78        10
           5       1.00      0.80      0.89        10
           6       0.80      0.67      0.73        12
           7       0.45      0.45      0.45        11
           8       0.40      0.60      0.48        10
           9       0.78      0.70      0.74        10
          10       0.64      0.82      0.72        11
          11       0.86      0.75      0.80         8
          12       0.50      0.50      0.50        10
          13       0.70      0.70      0.70        10
          14       0.91      1.00      0.95        10
          15       0.56      0.45      

# RandomForest

In [14]:
RANDOM_STATE = 128

In [15]:
from sklearn.ensemble import RandomForestClassifier


model_RFC = RandomForestClassifier(random_state=RANDOM_STATE) 
model_RFC.fit(x_train_images, y_train_encoded) 

# print prediction results 
predictions_RFC = model_RFC.predict(x_test_images) 
print(classification_report(y_test_encoded, predictions_RFC)) 

              precision    recall  f1-score   support

           0       0.61      1.00      0.76        11
           1       0.71      0.91      0.80        11
           2       0.91      0.91      0.91        11
           3       0.89      0.89      0.89         9
           4       1.00      0.90      0.95        10
           5       1.00      0.90      0.95        10
           6       0.82      0.75      0.78        12
           7       0.57      0.73      0.64        11
           8       0.64      0.70      0.67        10
           9       0.88      0.70      0.78        10
          10       0.67      0.91      0.77        11
          11       1.00      0.38      0.55         8
          12       0.62      0.50      0.56        10
          13       1.00      0.40      0.57        10
          14       0.89      0.80      0.84        10
          15       0.64      0.64      0.64        11
          16       0.77      1.00      0.87        10
          17       0.91    

In [16]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test_encoded, predictions_RFC, average='weighted')
recall = recall_score(y_test_encoded, predictions_RFC, average='weighted')
f1 = f1_score(y_test_encoded, predictions_RFC, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.8289557503843218
Recall: 0.803088803088803
F1-score: 0.7984903373070568


In [17]:
param_grid = { 
    'n_estimators': [100,300,400],
    'max_features': ['sqrt'],
    'max_depth' : [1,2,5,10,30],
    'criterion' :['gini', 'entropy']
}

grid_RFC = GridSearchCV(RandomForestClassifier(), param_grid, refit = True, verbose = 3) 
grid_RFC.fit(x_train_images, y_train_encoded) 
 

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.254 total time=   0.2s
[CV 2/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.256 total time=   0.2s
[CV 3/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.246 total time=   0.2s
[CV 4/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.267 total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.223 total time=   0.2s
[CV 1/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=300;, score=0.286 total time=   0.6s
[CV 2/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=300;, score=0.256 total time=   0.6s
[CV 3/5] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=300;, score=0.209 total time=   0.6s
[CV 4/5] END criterion=gini, max_depth=1, max_feat

[CV 4/5] END criterion=gini, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.842 total time=   7.2s
[CV 5/5] END criterion=gini, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.812 total time=   7.2s
[CV 1/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.252 total time=   0.3s
[CV 2/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.276 total time=   0.3s
[CV 3/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.218 total time=   0.3s
[CV 4/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.248 total time=   0.3s
[CV 5/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.238 total time=   0.3s
[CV 1/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=300;, score=0.276 total time=   0.9s
[CV 2/5] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=300;, score=0.2

[CV 1/5] END criterion=entropy, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.788 total time=  10.6s
[CV 2/5] END criterion=entropy, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.814 total time=  10.6s
[CV 3/5] END criterion=entropy, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.803 total time=  10.6s
[CV 4/5] END criterion=entropy, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.840 total time=  10.8s
[CV 5/5] END criterion=entropy, max_depth=30, max_features=sqrt, n_estimators=400;, score=0.833 total time=  10.5s


In [18]:
# print best parameter after tuning 
print(grid_RFC.best_params_) 
# print how our model looks after hyper-parameter tuning 
print(grid_RFC.best_estimator_) 
grid_predictions_RFC = grid_RFC.best_estimator_.predict(x_test_images)   
# print classification report 
print(classification_report(y_test_encoded, grid_predictions_RFC)) 

{'criterion': 'entropy', 'max_depth': 30, 'max_features': 'sqrt', 'n_estimators': 400}
RandomForestClassifier(criterion='entropy', max_depth=30, n_estimators=400)
              precision    recall  f1-score   support

           0       0.62      0.91      0.74        11
           1       0.75      0.82      0.78        11
           2       1.00      0.91      0.95        11
           3       1.00      1.00      1.00         9
           4       0.89      0.80      0.84        10
           5       1.00      0.90      0.95        10
           6       0.77      0.83      0.80        12
           7       0.73      0.73      0.73        11
           8       0.55      0.60      0.57        10
           9       0.67      0.40      0.50        10
          10       0.73      1.00      0.85        11
          11       1.00      0.50      0.67         8
          12       1.00      0.50      0.67        10
          13       0.86      0.60      0.71        10
          14       0.91   

# XGBoost

In [19]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV 
from sklearn.metrics import classification_report


xgb_model = xgb.XGBClassifier(objective="multi:softprob", random_state=42)
xgb_model.fit(x_train_images, y_train_encoded)

y_pred_XGB = xgb_model.predict(x_test_images)

print(classification_report(y_test_encoded, y_pred_XGB))


              precision    recall  f1-score   support

           0       1.00      0.82      0.90        11
           1       0.79      1.00      0.88        11
           2       0.85      1.00      0.92        11
           3       0.90      1.00      0.95         9
           4       0.89      0.80      0.84        10
           5       0.90      0.90      0.90        10
           6       0.90      0.75      0.82        12
           7       0.60      0.55      0.57        11
           8       0.55      0.60      0.57        10
           9       0.88      0.70      0.78        10
          10       0.79      1.00      0.88        11
          11       0.75      0.38      0.50         8
          12       0.62      0.50      0.56        10
          13       0.62      0.80      0.70        10
          14       0.91      1.00      0.95        10
          15       0.89      0.73      0.80        11
          16       1.00      0.90      0.95        10
          17       0.82    

In [20]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test_encoded, y_pred_XGB, average='weighted')
recall = recall_score(y_test_encoded, y_pred_XGB, average='weighted')
f1 = f1_score(y_test_encoded, y_pred_XGB, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)



Precision: 0.8094014736871878
Recall: 0.803088803088803
F1-score: 0.7990863220509586


In [None]:
params = {
    'n_estimators': [100,300],  # Количество деревьев в ансамбле
    'max_depth': [3, 9],             # Максимальная глубина дерева
    'learning_rate': [0.1, 0.01, 0.001],# Скорость обучения
}

grid_XGB = GridSearchCV(xgb.XGBClassifier(objective="multi:softprob"), params, refit=True, verbose=2) 
grid_XGB.fit(x_train_images, y_train_encoded)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=  41.5s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=  43.7s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=  40.5s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=  41.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=  41.1s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time= 1.9min
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time= 1.8min
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time= 1.7min
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time= 1.8min
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time= 1.8min
[CV] END ...learning_rate=0.1, max_depth=9, n_estimators=100; total time= 1.1min
[CV] END ...learning_rate=0.1, max_depth=9, n_es

In [None]:
# print best parameter after tuning 
print(grid_XGB.best_params_) 
# print how our model looks after hyper-parameter tuning 
print(grid_XGB.best_estimator_) 
grid_predictions_XGB = grid_XGB.best_estimator_.predict(x_test_images)   
# print classification report 
print(classification_report(y_test_encoded, grid_predictions_XGB))