In [3]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections


def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])


In [4]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(['I', 'go', 'cinema', 'yesterday','return', 'friend', 'death','son', 'hate', 'father'])
# actions = np.array(['son', 'hate', 'father'])


# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

# Folder start
start_folder = 30

In [5]:
import pandas as pd
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Define model-building function

model = Sequential()
model.add(GRU(
    units=128,
    return_sequences=True, input_shape=(30, 1662)))
model.add(Dropout(0.2))
model.add(GRU(
    units=128))
model.add(Dense(96, activation='relu'))
model.add(Dense(len(actions), activation='softmax'))

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)



  super().__init__(**kwargs)


In [6]:
model.summary()

In [7]:
model.load_weights('10gru.h5')

In [8]:
from scipy import stats

colors = [(245,117,16), (117,245,16), (16,117,245), ()]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [9]:
import cv2
import numpy as np
import google.generativeai as genai
import mediapipe as mp
import time
import os
import re

# Configure Google Gemini API
genai.configure(api_key="AIzaSyC0Lk1hGsHnFzT6QE6yACy7Uc9BIU4cTSw")
model_gemini = genai.GenerativeModel("gemini-1.5-pro-latest")


# Load conversation dynamically
conversation_file = "messages.txt"
conversation_update_interval = 2
last_update_time = 0

def load_conversation(file_path):
    extracted_sentences = []
    pattern = r'\d+\. normal_user:\s*(.*)'
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            for line in file:
                match = re.match(pattern, line.strip())
                if match:
                    extracted_sentences.append(match.group(1))
        return extracted_sentences
    except FileNotFoundError:
        return []

    
# Save generated LLM sentences with indexing
def save_llm_response(response, filename="messages.txt"):
    current_index = 1
    if os.path.exists(filename):
        with open(filename, "r", encoding="utf-8") as file:
            current_index = len(file.readlines()) + 1

    with open(filename, "a", encoding="utf-8") as file:
        file.write(f"{current_index}. sign_user:{response}\n")

# Generate meaningful sentences using Gemini AI
def generate_sentence_gemini(input_tokens, previous_sentence):
    prompt = f"""
    This is a real-time conversation about a murder case.
    - Normal person said: "{previous_sentence}"
    - Sign language tokens: {' '.join(input_tokens)}
    
    Convert these tokens into a grammatically correct response as if the sign user is answering.
    """
    response = model_gemini.generate_content(prompt)
    return response.text.strip()

# Save conversation log
def save_conversation_log(logs, filename="conversation_log.txt"):
    with open(filename, "w", encoding="utf-8") as file:
        file.writelines("\n".join(logs))


# Initialize variables
cap = cv2.VideoCapture(0)
sequence = []
sentence = []
predictions = []
saved_confidence_scores = []
threshold = 0.7
conversation_index = 0
conversation_log = []

conversation_file = "messages.txt"
conversation = load_conversation(conversation_file)
last_update_time = time.time()
update_interval = 2

with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if time.time() - last_update_time >= conversation_update_interval:
            conversation = load_conversation("messages.txt")
            last_update_time = time.time()

        if conversation_index < len(conversation):
            normal_sentence = conversation[conversation_index]
        else:
            normal_sentence = "Waiting for new messages..."

        image, results = mediapipe_detection(frame, holistic)

        if results.left_hand_landmarks or results.right_hand_landmarks:
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-30:]

            if len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                predicted_word = actions[np.argmax(res)]
                predictions.append(np.argmax(res))
                confidence_score = res[np.argmax(res)]

                if np.unique(predictions[-10:])[0] == np.argmax(res):
                    if confidence_score > threshold:
                        if len(sentence) == 0 or (predicted_word != sentence[-1]):
                            sentence.append(predicted_word)
                            saved_confidence_scores.append(confidence_score)
        else:
                    
            sequence.clear()
            print("Hands out of frame: Resetting keypoints and predictions.")

        cv2.rectangle(image, (0, 0), (640, 50), (245, 117, 16), -1)
        cv2.putText(image, f"Normal: {normal_sentence}", (3, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
        cv2.putText(image, f"Sign: {' '.join(sentence)}", (3, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
         
        # Draw background for confidence scores
        cv2.rectangle(image, (0, 60), (80, 600), (50, 50, 50), -1)
        
        # Display saved confidence scores below the text
        if saved_confidence_scores:
            for i, (word, score) in enumerate(zip(sentence, saved_confidence_scores)):
                cv2.putText(image, f"{word}: {score:.2f}", (10, 80 + (i * 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1, cv2.LINE_AA)

        # Show the output
       
        cv2.imshow('Real-Time Sign Language Conversation', image)

        key = cv2.waitKey(10) & 0xFF
        if key == ord('s'):
            if sentence:
                meaningful_response = generate_sentence_gemini(sentence, normal_sentence)
                conversation_log.append(f"Normal: {normal_sentence}")
                conversation_log.append(f"Sign: {meaningful_response}")
                save_llm_response(meaningful_response)

            sentence.clear()
            saved_confidence_scores.clear()
            conversation_index += 1

        elif key == ord('r') and sentence:
            sentence.pop()
            saved_confidence_scores.pop()

        elif key == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()
save_conversation_log(conversation_log)
print("Conversation log saved.")


  from .autonotebook import tqdm as notebook_tqdm


Hands out of frame: Resetting keypoints and predictions.




Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting keypoints and predictions.
Hands out of frame: Resetting k