In [9]:
import numpy as np
import pandas as pd
import cv2
import mediapipe as mp
from sklearn.ensemble import RandomForestClassifier
import optuna
from optuna.samplers import GridSampler
from sklearn.metrics import accuracy_score,log_loss
from sklearn.model_selection import train_test_split,cross_validate

import time
import pyttsx3


import warnings
warnings.filterwarnings("ignore")

In [10]:
hands = mp.solutions.hands
draw = mp.solutions.drawing_utils
mp_styles = mp.solutions.drawing_styles

In [16]:
sign_df=pd.read_csv(r"hand_sign_dataset.csv")

In [17]:
fv=sign_df.iloc[:,:-1]
cv=sign_df.iloc[:,-1]

In [18]:
final_data = []
for d in fv.values:
    md = d.reshape(42,3)
    center = md-md[0]
    distance = np.linalg.norm(md[12]-md[0])
    fpd = center/distance
    final_data.append(fpd.flatten())

fv_final=pd.DataFrame(final_data)

In [19]:
X_train,X_test,y_train,y_test = train_test_split(fv_final,cv,test_size=0.2,stratify=cv,random_state=40)

In [20]:
def objective(trial):
  # define hyperparameter
    n_estimators=trial.suggest_int('n_estimators',3,40)
    max_depth=trial.suggest_int('max_depth',3,30)
    min_samples_split=trial.suggest_int('min_samples_split',2,15)
    min_samples_leaf=trial.suggest_int('min_samples_leaf',1,20)
    
    rf=RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)


  #training by using k-fold acc
    values=cross_validate(rf,X_train,y_train,cv=3,scoring='accuracy',return_train_score=True)
    cv_acc=values['test_score'].mean()
    train_acc=values['train_score'].mean()
    
    #return additional parameter
    trial.set_user_attr('train_acc',train_acc)
    
    return cv_acc

In [21]:
study=optuna.create_study(direction="maximize")

[I 2025-08-02 17:43:27,933] A new study created in memory with name: no-name-d3ef343a-2e89-474d-aaf3-3587a6921040


In [22]:
study.optimize(objective,n_trials=100)

[I 2025-08-02 17:43:35,419] Trial 0 finished with value: 0.9998611111111111 and parameters: {'n_estimators': 28, 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 9}. Best is trial 0 with value: 0.9998611111111111.
[I 2025-08-02 17:43:43,994] Trial 1 finished with value: 0.9998611111111111 and parameters: {'n_estimators': 36, 'max_depth': 15, 'min_samples_split': 6, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.9998611111111111.
[I 2025-08-02 17:43:47,658] Trial 2 finished with value: 0.9998611111111111 and parameters: {'n_estimators': 14, 'max_depth': 20, 'min_samples_split': 14, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.9998611111111111.
[I 2025-08-02 17:43:54,283] Trial 3 finished with value: 0.9995833333333334 and parameters: {'n_estimators': 33, 'max_depth': 8, 'min_samples_split': 14, 'min_samples_leaf': 15}. Best is trial 0 with value: 0.9998611111111111.
[I 2025-08-02 17:43:59,523] Trial 4 finished with value: 0.9998611111111111 and parameters: {'n

In [25]:
study.best_params

{'n_estimators': 36,
 'max_depth': 22,
 'min_samples_split': 3,
 'min_samples_leaf': 6}

In [26]:
rf = RandomForestClassifier(n_estimators=36,
                                    max_depth=2,
                                    min_samples_split=3,
                                    min_samples_leaf=6)

rf.fit(X_train,y_train)

In [27]:
y_pred=rf.predict(X_test)

In [28]:
accuracy_score(y_test,y_pred)

0.6294444444444445

In [2]:
# import pickle
# with open('sign_model3_new.pkl', 'wb') as f:
#     rf=pickle.dump(rf,f)

In [6]:
import pickle
with open('sign_model3_new.pkl', 'rb') as f:
    rf=pickle.load(f)

In [7]:
def speak(text):
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()
    engine.stop()

In [11]:
fm_model = hands.Hands(static_image_mode=True,
                       min_detection_confidence=0.9,
                       min_tracking_confidence=0.9,
                       max_num_hands=2)

vid = cv2.VideoCapture(1)
pred = None
sentence = ""
last_update_time = time.time()
delay_between_predictions = 1

status_msg = ""
start_prediction = False

while True:
    b, f = vid.read()
    # f = cv2.flip(f, 1)
    if not b:
        break

    rgb = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
    result = fm_model.process(rgb)

    if result.multi_hand_landmarks:
        # Draw landmarks for all detected hands
        for hand_landmarks in result.multi_hand_landmarks:
            mp.solutions.drawing_utils.draw_landmarks(
                image=f,
                landmark_list=hand_landmarks,
                connections=hands.HAND_CONNECTIONS,
                landmark_drawing_spec=mp_styles.get_default_hand_landmarks_style(),
                connection_drawing_spec=mp_styles.get_default_hand_connections_style()
            )

        all_hands = []
        for hand_landmarks in result.multi_hand_landmarks:
            hand = []
            for lm in hand_landmarks.landmark:
                hand.extend([lm.x, lm.y, lm.z])
            all_hands.append(hand)

        if len(all_hands) == 2:  # Ensure 2 hands detected
            combined_hand = all_hands[0] + all_hands[1]  # concatenate both hands
            d = np.array(combined_hand)
            md = d.reshape(42, 3)  # 42 landmarks (2x21) with 3 coords each
            center = md - md[0]
            distance = np.linalg.norm(md[12] - md[0])
            if distance != 0:
                fpd = center / distance
                fv_final = pd.DataFrame([fpd.flatten()])  # single row for prediction

                prediction = rf.predict(fv_final)
                pred = prediction[0]

                current_time = time.time()

                if pred == "START":
                    start_prediction = True
                    status_msg = "Started collecting data"

                elif pred == "STOP":
                    start_prediction = False
                    status_msg = "Stopped collecting data"
                    speak(sentence)

                elif start_prediction and (current_time - last_update_time > delay_between_predictions):
                    if pred == "SPACE":
                        sentence += " "
                    elif pred == "FULLSTOP":
                        sentence += "."
                    elif pred == "BACKSPACE":
                        sentence = sentence[:-1]
                    else:
                        sentence += pred

                    last_update_time = current_time
        else:
            pred = None  # Not exactly 2 hands detected, skip prediction

    else:
        pred = None

    cv2.putText(f, f"Sentence: {sentence}", (30, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 5)

    if status_msg:
        cv2.putText(f, status_msg, (30, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 3)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

    cv2.imshow("frame", f)

vid.release()
cv2.destroyAllWindows()
