In [1]:
import tensorflow as tf
import numpy as np
import cv2
import mediapipe as mp


In [2]:
# Load the pre-trained TensorFlow model
model = tf.keras.models.load_model('model.h5')
mp_draw = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
tipIDs = [4,8,12,16,20] # IDs of the 5 fingers in the right hand (see the photo)



In [3]:
face_detect = cv2.CascadeClassifier('face.xml') # loading a pretrained xml file to detect the face in the camera
# Using these arrays to map from 0,1,2,3 to our names and our emiotions
persons_index = ['Jimmy','Maya','Moataz','Mona']
emotions_index = ['Neutral','Happy']
genders_index = ['Male','Female']

In [4]:
MESSAGE_FILE = 'message.txt'

# Your existing code here

def write_message(message):
    with open(MESSAGE_FILE, 'w') as file:
        file.write(message)

In [5]:
import os

def play_music(filename):
    # Get the path to the music folder
    music_folder = "music"

    # Construct the full path to the music file
    file_path = os.path.join(music_folder, filename)

    # Check if the file exists
    if not os.path.exists(file_path):
        print("File not found:", filename)
        return

    # Open the music file with the default program
    os.startfile(file_path)

songs_mapping = {
    "Jimmy": "elgharam.mp3",
    "Maya": "Tak.mp3",
    "Moataz": "ekhwaty.mp3",
    "Mona": "ghazala.mp3"

}

In [14]:
cap = cv2.VideoCapture(0) # Open camera
hands = mp_hands.Hands(min_detection_confidence = 0.5,min_tracking_confidence = 0.5) # Loading the hand pose detection model


while True:
    total = 0
    ret,image = cap.read() # Read the video from the camera, each frame is stored in the image variable
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) # converting the image from from bgr to rgb (to suit the hands model)
    image.flags.writeable = False # image must be read only before being processed by the hands model
    results = hands.process(image) # getting all the hands in the image
    image.flags.writeable = True # return the image to be writeable again
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) # return the image bo be BGR again
    lmList = [] # List to store hand landmarks (points)
    if results.multi_hand_landmarks: # if there is a hand
        for hand_landmark in results.multi_hand_landmarks: # for each landmark (point) in the hand you found
            my_hand = results.multi_hand_landmarks[0] # detecing only one hand in the image
            for id,lm in enumerate(my_hand.landmark): # get the landmark and the id of the landmark
                h,w,c = image.shape # getting the height,width and nubmer of channels in the image
                cx,cy = int(lm.x*w), int(lm.y*h) # getting the exact location of this landmark in my image
                lmList.append([id,cx,cy]) # appending the landmark and its location in the lmList array
        mp_draw.draw_landmarks(image,hand_landmark,mp_hands.HAND_CONNECTIONS) # draw landmarks (points) on my hands

    fingers = [] # Storing the fingers shown in the image
    if len(lmList) != 0: # if you found a hand
        if lmList[tipIDs[0]][1] > lmList[tipIDs[0]-1][1]:
            # Check if the landmark of id = 4 location value is greater than the value of landmark of id = 3
            # that means that i have my first finger straight in the image
            fingers.append(1) # adding 1 to the fingers array
        else: # if the first finger is not straight shown
            fingers.append(0) # add 0 to the fingers array
        for id in range(1,5): # Checking for all fingers with the same method
            if lmList[tipIDs[id]][2] <  lmList[tipIDs[id]-2][2]:
                fingers.append(1)
            else:
                fingers.append(0)
        total = fingers.count(1) # Counting the nubmer of ones (fingers)
        #instruction=hand_state[total]
        

    face = face_detect.detectMultiScale(image,1.3,5) # detect faces in the image
    if len(face)>0: # if there is a face in the image
        x,y,w,h = face[0] # get coordinates of that face
        face = image[y:y+h,x:x+w] # cropping only the face from the image
        frame_resized = cv2.resize(face, (224, 224)) # resizing the detected face
        #frame_resized = frame_resized / 255.0  # Normalize the frame (if needed)

        # Expand dimensions to make it a batch of one image
        frame_resized = np.expand_dims(frame_resized, axis=0) # face image must be in 4 dimensions to be suitable for the model predection
        # instead of 3 dimensions (width,height,number_of_channels)
        # it will be of 4 dimensions (1,width,height,number_of_channels)
        # that 1 means (you will only predict 1 face)

        # Make predictions using the model
        predictions = model.predict(frame_resized)

        # Extract predictions for each task
        face_prediction, emotion_prediction, gender_prediction = predictions

        # face_prediction is for example:
        # [0.2 0.4 0.2 0.2] as a result from the softmax acitivation function
        # that means that it a photo of maya because she has the highest probability

        # Get the indices of the predicted classes (highest probailites)
        predicted_face_class = np.argmax(face_prediction)
        predicted_emotion_class = np.argmax(emotion_prediction)
        predicted_gender_class = np.argmax(gender_prediction)

        # Put text on the video with predicted classes
        text = f"Face Class: {persons_index[predicted_face_class]} | Emotion Class: {emotions_index[predicted_emotion_class]}"
        cv2.putText(image, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        
        # Slightly down for gender text
        gender_text = f"Gender Class: {genders_index[predicted_gender_class]}"
        cv2.putText(image, gender_text, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

        cv2.putText(image, str(total), (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)


    cv2.imshow("MJ",image) # showing the video
    k = cv2.waitKey(1)
    if k == ord('q'): # quit when q is pressed
        break

    mapping = {
        0:"STOP",
        1:"STOP",
        2:"TURN ONE",
        3:"LED ON",
        4:"BUZZER ON",
        5:"FORWARD"
    }
    
    message = mapping[int(total)]
    write_message(message)
    try:
        if emotions_index[predicted_emotion_class] == "Happy":
            if total == 1:
                play_music(songs_mapping[persons_index[predicted_face_class]])
    except:
        pass

cap.release() # stop recording video
cv2.destroyAllWindows() # close the video window

