In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
mp_holistic = mp.solutions.holistic  # Holistic model
mp_drawing = mp.solutions.drawing_utils  # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False  # Image is no longer writeable
    results = model.process(image)  # Make prediction
    image.flags.writeable = True  # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR CONVERSION RGB 2 BGR
    return image, results

def draw_landmarks(image, results):
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)  # Draw pose connections
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)  # Left hand
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)  # Right hand
    # Draw face landmarks (use mp.solutions.face_mesh instead)
    if results.face_landmarks:
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp.solutions.face_mesh.FACEMESH_CONTOURS)

def draw_styled_landmarks(image, results):
    # Draw face connections
    if results.face_landmarks:
        mp_drawing.draw_landmarks(
            image, results.face_landmarks, mp.solutions.face_mesh.FACEMESH_CONTOURS,
            mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
            mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
        )
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2))
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

cap = cv2.VideoCapture(0)

# Set the resolution (width x height)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1000)  # Set width q
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600)  # Set height 

# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [3]:
# step 3
def extract_keypoints(results):
    """
    Extracts keypoint values from MediaPipe results for pose, face, and hands.
    Returns a single flattened array of keypoints.
    """
    # Extract pose landmarks
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() \
        if results.pose_landmarks else np.zeros(33 * 4)

    # Extract face landmarks
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() \
        if results.face_landmarks else np.zeros(468 * 3)

    # Extract left hand landmarks
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() \
        if results.left_hand_landmarks else np.zeros(21 * 3)

    # Extract right hand landmarks
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() \
        if results.right_hand_landmarks else np.zeros(21 * 3)

    # Concatenate all keypoints into a single array
    return np.concatenate([pose, face, lh, rh])

# Example usage
result_test = extract_keypoints(results)
print("Shape of extracted keypoints:", result_test.shape)


Shape of extracted keypoints: (1662,)


In [4]:
# step 4
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data')

# Actions that we try to detect
actions = np.array(['yes','no','burger','KFC''coke', 'Nescafe'])

# Number of videos worth of data
no_sequences = 40

# Videos are going to be 30 frames in length
sequence_length = 30

# Ensure base directory exists
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

# Create directories for each action and sequence
for action in actions: 
    for sequence in range(no_sequences):
        path = os.path.join(DATA_PATH, action, str(sequence))
        try: 
            os.makedirs(path)
            print(f"Created folder: {path}")
        except FileExistsError:
            print(f"Folder already exists: {path}")


Folder already exists: MP_Data\coke\0
Folder already exists: MP_Data\coke\1
Folder already exists: MP_Data\coke\2
Folder already exists: MP_Data\coke\3
Folder already exists: MP_Data\coke\4
Folder already exists: MP_Data\coke\5
Folder already exists: MP_Data\coke\6
Folder already exists: MP_Data\coke\7
Folder already exists: MP_Data\coke\8
Folder already exists: MP_Data\coke\9
Folder already exists: MP_Data\coke\10
Folder already exists: MP_Data\coke\11
Folder already exists: MP_Data\coke\12
Folder already exists: MP_Data\coke\13
Folder already exists: MP_Data\coke\14
Folder already exists: MP_Data\coke\15
Folder already exists: MP_Data\coke\16
Folder already exists: MP_Data\coke\17
Folder already exists: MP_Data\coke\18
Folder already exists: MP_Data\coke\19
Folder already exists: MP_Data\coke\20
Folder already exists: MP_Data\coke\21
Folder already exists: MP_Data\coke\22
Folder already exists: MP_Data\coke\23
Folder already exists: MP_Data\coke\24
Folder already exists: MP_Data\coke

In [33]:
#step 5, collect data, no need to run since alr collected
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
#                 print(results)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

In [5]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

actions = ["burger", "yes", "no", "Nescafe","KFC","coke"]

# Map actions to numerical labels
label_map = {label: num for num, label in enumerate(actions)}

# Prepare sequences and labels
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

# Convert to numpy arrays
X = np.array(sequences)
y = to_categorical(labels).astype(int)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

print(f"Training data shape: {X_train.shape}, {y_train.shape}")
print(f"Testing data shape: {X_test.shape}, {y_test.shape}")


Training data shape: (228, 30, 1662), (228, 6)
Testing data shape: (12, 30, 1662), (12, 6)


In [18]:
# training LSTM, no need to run this again
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import time

# Create a TensorBoard callback
log_dir = os.path.join('Logs')
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

tb_callback = TensorBoard(log_dir=log_dir)

# Define the model
model = Sequential([
    LSTM(64, return_sequences=True, activation='relu', input_shape=(30, X_train.shape[2]),kernel_regularizer=l2(0.01)),
    LSTM(128, return_sequences=True, activation='relu',kernel_regularizer=l2(0.01)),
    LSTM(64, return_sequences=False, activation='relu',kernel_regularizer=l2(0.01)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(6, activation='softmax')  # Output layer for classification
])

# Compile the model
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.0001)

# Train the model
model.fit(
    X_train, y_train,
    epochs=300,  # Use a reasonable number of epochs
    validation_data=(X_test, y_test),
    callbacks=[tb_callback]
)

model.save('test(YN)_action_model.h5')

Epoch 1/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 294ms/step - categorical_accuracy: 0.1385 - loss: 8.6497 - val_categorical_accuracy: 0.0000e+00 - val_loss: 7.3672
Epoch 2/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 113ms/step - categorical_accuracy: 0.1917 - loss: 7.0620 - val_categorical_accuracy: 0.0833 - val_loss: 6.5136
Epoch 3/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 116ms/step - categorical_accuracy: 0.1825 - loss: 6.0651 - val_categorical_accuracy: 0.0833 - val_loss: 5.3450
Epoch 4/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 104ms/step - categorical_accuracy: 0.1357 - loss: 5.1005 - val_categorical_accuracy: 0.5833 - val_loss: 4.6295
Epoch 5/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 104ms/step - categorical_accuracy: 0.2071 - loss: 4.5412 - val_categorical_accuracy: 0.0833 - val_loss: 4.2550
Epoch 6/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s



In [19]:
model.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - categorical_accuracy: 0.8333 - loss: 1.6765


[1.676526427268982, 0.8333333134651184]

In [6]:
#reloading model
from tensorflow.keras.models import load_model
model = load_model('test(YN)_action_model.h5')
#You can now use this model to evaluate or predict without retraining.




In [20]:
#switch X_train to X_test and y_train to y_test if you want
from sklearn.metrics import multilabel_confusion_matrix
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
multilabel_confusion_matrix(ytrue, yhat)





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 998ms/step


array([[[11,  0],
        [ 0,  1]],

       [[ 9,  1],
        [ 0,  2]],

       [[ 5,  0],
        [ 2,  5]],

       [[ 9,  1],
        [ 0,  2]]], dtype=int64)

In [21]:
# yes, no control and send output to Gui
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

# Load trained model
model = load_model('test(YN)_action_model.h5')

# Mediapipe initialization
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Actions and variables
actions = ["burger", "yes", "no", "KFC","Nescafe","coke"]
sequence = []
sentence = []
threshold = 0.8

# Visualization helper
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0, 60 + num * 40), (int(prob * 100), 90 + num * 40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85 + num * 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

# Detection loop
cap = cv2.VideoCapture(0)
action_images = {
    "burger": "C:/Users/YUANJIE/OneDrive - International Islamic University Malaysia/Documents/machinevision/uma_cv_2024-main/uma_cv_2024-main/hand sign detection/burgermv.jpg",
    "coke": "C:/Users/YUANJIE/OneDrive - International Islamic University Malaysia/Documents/machinevision/uma_cv_2024-main/uma_cv_2024-main/hand sign detection/cokemv.jpg",
    "Nescafe": "C:/Users/YUANJIE/OneDrive - International Islamic University Malaysia/Documents/machinevision/uma_cv_2024-main/uma_cv_2024-main/hand sign detection/nescafemvv.jpg",
    "KFC": "C:/Users/YUANJIE/OneDrive - International Islamic University Malaysia/Documents/machinevision/uma_cv_2024-main/uma_cv_2024-main/hand sign detection/kfcmvv.jpg",
    
}
colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245), (102, 145, 147),(23,200,87),(142,160,178),(231,40,120),(50,245,30)]

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        image, results = mediapipe_detection(frame, holistic)

        draw_styled_landmarks(image, results)

        # Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            detected_action = actions[np.argmax(res)]
            print(f"Detected action: {detected_action}")

            if detected_action == "yes" and res[np.argmax(res)] > threshold:
                print("Order completed. Exiting...")
                break
            elif detected_action == "no" and res[np.argmax(res)] > threshold:
                if sentence:
                    removed = sentence.pop()
                    print(f"Removed: {removed}")
            elif detected_action not in ["yes", "no"] and res[np.argmax(res)] > threshold:
                if not sentence or detected_action != sentence[-1]:
                    sentence.append(detected_action)
                    print(f"Added: {detected_action}")

            if len(sentence) > 5:
                sentence = sentence[-5:]

            # Save order to file
            with open('order_list.txt', 'w') as f:
                for item in sentence:
                    f.write(f"{item}\n")

            # Visualize probabilities
            image = prob_viz(res, actions, image, colors)

            # Display the image corresponding to the top action
            if detected_action in action_images:
                action_image = cv2.imread(action_images[detected_action])
                if action_image is not None:
                    action_image = cv2.resize(action_image, (100, 100))  # Resize to fit the corner
                    image[50:150, -110:-10] = action_image  # Position the image at top-right corner

        # Display current sentence
        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 806ms/step
Detected action: Nescafe
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
Detected action: Nescafe
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Detected action: Nescafe
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
Detected action: Nescafe
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
Detected action: Nescafe
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
Detected action: burger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
Detected action: burger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
Detected action: burger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
Detected action: burger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
Detected action: burger
[1m1/1[0m [32m━━━━━━━━━━━━━━

In [51]:
import tkinter as tk

def update_order_display():
    try:
        with open('order_list.txt', 'r') as f:
            items = f.readlines()
        order_var.set("\n".join(item.strip() for item in items))
    except FileNotFoundError:
        order_var.set("No items detected yet.")

    root.after(1000, update_order_display)

# Tkinter GUI setup
root = tk.Tk()
root.title("Order List")
root.geometry("300x300")

order_var = tk.StringVar()
label = tk.Label(root, textvariable=order_var, font=("Helvetica", 14), justify="left")
label.pack(pady=20)

update_order_display()
root.mainloop()


In [None]:
# ROBOFLOW data
import cv2
from ultralytics import YOLO
import numpy as np
import time
import random

# Load the YOLO model
model = YOLO("projectmv.pt")

# Actions and variables
sentence = []
general_threshold = 0.6
yes_no_threshold = 0.8  # Updated threshold for "yes" and "no" actions

# Action images (update these paths as needed)
action_images = {
    "Burger": "C:/Users/hp/Downloads/uma_cv_2024-main/uma_cv_2024-main/Projectmv/burgermv.jpg",
    "coke": "C:/Users/hp/Downloads/uma_cv_2024-main/uma_cv_2024-main/Projectmv/cokemv.jpg",
    "Nescafe": "C:/Users/hp/Downloads/uma_cv_2024-main/uma_cv_2024-main/Projectmv/nescafemvv.jpg",
    "KFC": "C:/Users/hp/Downloads/uma_cv_2024-main/uma_cv_2024-main/Projectmv/kfcmvv.jpg",
    "Curry": "C:/Users/hp/Downloads/uma_cv_2024-main/uma_cv_2024-main/Projectmv/currymv.jpg",
}

# Function to generate random colors
def generate_colors(n):
    return [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(n)]

# Visualization helper function
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        color = colors[num] if num < len(colors) else (0, 0, 0)  # Default to black if colors are exhausted
        cv2.rectangle(output_frame, (0, 60 + num * 40), (int(prob * 100), 90 + num * 40), color, -1)
        cv2.putText(output_frame, actions[num], (0, 85 + num * 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

# Open video capture
cap = cv2.VideoCapture(0)

# Set video capture resolution (resize window)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)  # Width 1024px
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)  # Height 768px

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame, stream=True)

    for result in results:
        boxes = result.boxes

        # Check if boxes are empty or not
        if len(boxes) > 0:
            for bbox in boxes:
                x1, y1, x2, y2 = bbox.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                cls_idx = int(bbox.cls[0])
                cls_name = model.names[cls_idx]
                confidence = bbox.conf[0]

                print(f"Detected: {cls_name} with confidence: {confidence}")  # Debugging line

                # Draw bounding box and label
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 4)
                cv2.putText(frame, cls_name, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

                # Handle detected actions
                if cls_name in ["yes", "no"] and confidence > yes_no_threshold:
                    if cls_name == "yes":
                        print("Order completed. Saving and exiting...")
                        with open("order_list.txt", "w") as file:
                            for item in sentence:
                                file.write(f"{item}\n")
                        cap.release()
                        cv2.destroyAllWindows()
                        exit()
                    elif cls_name == "no" and sentence:
                        print("Detected 'no'. Immediately removing the last item...")
                        removed = sentence.pop()
                        print(f"Removed: {removed}")
                elif cls_name not in ["yes", "no"] and confidence > general_threshold:
                    if not sentence or cls_name != sentence[-1]:
                        sentence.append(cls_name)

                # Display action images if detected
                if cls_name in action_images:
                    print(f"Displaying image for: {cls_name}")  # Debugging line
                    action_image = cv2.imread(action_images[cls_name])
                    if action_image is not None:
                        action_image = cv2.resize(action_image, (100, 100))  # Resize to fit the corner
                        frame[50:150, -110:-10] = action_image  # Position the image at top-right corner
                    else:
                        print(f"Image for {cls_name} not found!")  # Debugging line

    # Generate a list of random colors based on sentence length
    colors = generate_colors(len(sentence))

    # Display sentence on the frame
    cv2.rectangle(frame, (0, 0), (1024, 40), (0, 0, 0), -1)  # Adjust rectangle for sentence display
    cv2.putText(frame, " ".join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    # Visualize the action probabilities (if needed)
    frame = prob_viz([1] * len(sentence), sentence, frame, colors)

    # Show the frame with detection and action images
    cv2.imshow("YOLO Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 (no detections), 354.5ms
Speed: 10.9ms preprocess, 354.5ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 329.9ms
Speed: 4.0ms preprocess, 329.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 309.1ms
Speed: 4.0ms preprocess, 309.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 307.1ms
Speed: 2.0ms preprocess, 307.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 262.9ms
Speed: 2.6ms preprocess, 262.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 262.2ms
Speed: 3.0ms preprocess, 262.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 251.3ms
Speed: 2.4ms preprocess, 251.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 253.3ms
Speed: 3.0ms prep

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/hp/Downloads/uma_cv_2024-main/uma_cv_2024-main/Projectmv/kfcmvv.jpg'

: 

In [None]:
import tkinter as tk

def update_order_display():
    try:
        with open('order_list.txt', 'r') as f:
            items = f.readlines()
        order_var.set("\n".join(item.strip() for item in items))
    except FileNotFoundError:
        order_var.set("No items detected yet.")

    root.after(1000, update_order_display)

# Tkinter GUI setup
root = tk.Tk()
root.title("Order List")
root.geometry("300x300")

order_var = tk.StringVar()
label = tk.Label(root, textvariable=order_var, font=("Helvetica", 14), justify="left")
label.pack(pady=20)

update_order_display()
root.mainloop()

In [12]:
# basic code to show performance
colors = [(245,117,16), (117,245,16), (16,117,245), (102,145,147)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

# 1. New detection variables
sequence = []
sentence = []
threshold = 0.9

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
#         sequence.insert(0,keypoints)
#         sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            
            
        #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti