In [1]:
import cv2
import mediapipe as mp
import time
import math
class poseDetector():
    def __init__(self):
        self.mpPose = mp.solutions.pose
        self.pose = self.mpPose.Pose(static_image_mode = False,
        model_complexity = 0,
        smooth_landmarks = True,
        enable_segmentation = False,
        smooth_segmentation = True,
        min_detection_confidence= 0.5,
        min_tracking_confidence = 0.5)
        self.mpDraw = mp.solutions.drawing_utils

    def findPose(self,img,draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.pose.process(imgRGB)
        if self.results.pose_landmarks:
            if draw:
                self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
        return img
    def getPosition(self,img,draw=True):
        self.lmList=[]
        if self.results.pose_landmarks:
            for id,lm in enumerate(self.results.pose_landmarks.landmark):
                h,w,c=img.shape
                cx,cy=int(lm.x*w),int(lm.y*h)
                self.lmList.append([id,cx,cy])
                if draw:
                    cv2.circle(img,(cx,cy),5,(0,255,0),cv2.FILLED)
        return self.lmList
    def findDistance(self, p1, p2, img=None, color=(255, 0, 255), scale=5):
        x1, y1 = p1
        x2, y2 = p2
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
        length = math.hypot(x2 - x1, y2 - y1)
        info = (x1, y1, x2, y2, cx, cy)

        if img is not None:
            cv2.circle(img, (x1, y1), 10, color, cv2.FILLED)
            cv2.circle(img, (x2, y2), 10, color, cv2.FILLED)
            cv2.line(img, (x1, y1), (x2, y2), color, max(1, scale // 3))
            cv2.circle(img, (cx, cy), 10, color, cv2.FILLED)

        return length, info, img

    def findAngle(self,img,p1,p2,p3,draw=True):
        x1,y1=self.lmList[p1][1:]
        x2,y2=self.lmList[p2][1:]
        x3,y3=self.lmList[p3][1:]

        # Calculate the angle
        angle=math.degrees(math.atan2(y3-y2,x3-x2)-math.atan2(y1-y2,x1-x2))
        if angle<0:
            angle=angle+360
        if draw:
            cv2.line(img,(x1,y1),(x2,y2),(255,255,255),3)
            cv2.line(img,(x3,y3),(x2,y2),(255,255,255),3)
            cv2.circle(img,(x1,y1),5,(255,0,0),cv2.FILLED)
            cv2.circle(img, (x1, y1), 5, (255, 0, 0), 2)
            cv2.circle(img,(x2,y2),5,(255,0,0),cv2.FILLED)
            cv2.circle(img, (x2, y2), 5, (255, 0, 0), 2)
            cv2.circle(img,(x3,y3),5,(255,0,0),cv2.FILLED)
            cv2.circle(img, (x3, y3), 5, (255, 0, 0), 2)
            cv2.putText(img,str(int(angle)),(x2-20,y2+50),cv2.FONT_HERSHEY_PLAIN,2,(255,0,255),2)
        return angle
def main():
    cap = cv2.VideoCapture(0)
    pTime=0
    detector=poseDetector()
    while True:
        success, img = cap.read()
        img=cv2.resize(img,(900,600))
        detector.findPose(img)
        lmList=detector.getPosition(img)
        print(lmList)
        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime
        cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3)
        cv2.imshow("Image", img)
        cv2.waitKey(1)
if __name__=='__main__':
    main()

[]
[[0, 614, 58], [1, 581, 121], [2, 575, 123], [3, 569, 125], [4, 599, 126], [5, 606, 132], [6, 615, 138], [7, 578, 150], [8, 646, 169], [9, 636, 31], [10, 657, 38], [11, 616, -21], [12, 895, 58], [13, 611, -208], [14, 1090, -22], [15, 632, -224], [16, 945, 64], [17, 630, -234], [18, 905, 71], [19, 634, -236], [20, 885, 104], [21, 647, -231], [22, 900, 99], [23, 882, -776], [24, 1083, -768], [25, 662, -623], [26, 901, -709], [27, 563, -866], [28, 801, -960], [29, 575, -921], [30, 813, -1005], [31, 467, -862], [32, 691, -993]]
[[0, 570, 105], [1, 536, 168], [2, 529, 173], [3, 521, 177], [4, 549, 167], [5, 552, 170], [6, 557, 174], [7, 491, 202], [8, 540, 201], [9, 561, 82], [10, 577, 80], [11, 423, 18], [12, 656, 71], [13, 492, -249], [14, 904, -91], [15, 592, -444], [16, 917, -198], [17, 596, -510], [18, 906, -243], [19, 610, -516], [20, 885, -232], [21, 616, -493], [22, 896, -219], [23, 582, -748], [24, 757, -722], [25, 709, -921], [26, 887, -966], [27, 682, -1562], [28, 926, -1556],

KeyboardInterrupt: 

In [None]:
import time
import pyautogui
import pyttsx3
import cv2
import mediapipe as mp
import PoseEstimationModule as pem

# Initialize video capture and pose detector
cap = cv2.VideoCapture(0)
detector = pem.poseDetector()

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(max_num_faces=1)

# Initialize counter and stack for gesture recognition
counter = 0
st = ""
stack = []

while True:
    success, img = cap.read()
    img = cv2.resize(img, (900, 650))
    img = cv2.flip(img, 1)
    hCam, wCam, _ = img.shape
    
    # Detect pose and get landmark positions
    detector.findPose(img, draw=True)
    lmList = detector.getPosition(img, draw=True)
    
    if lmList:
        p1, p2 = lmList[1][1:], lmList[23][1:]
        left, right = lmList[18][1:], lmList[19][1:]
        shoulder = lmList[12][1:]
        
        # Calculate distances for gesture recognition
        l, _, _ = detector.findDistance(p1, p2)
        l1, _, _ = detector.findDistance(left, right)
        flag = 0
        st = ""
        
        # Determine the gesture
        if l1 < 100:
            st = "Namaste"
        if right[1] < 200:
            st = "Hi"
        if right[1] < 200 and left[1] < 200:
            st = "Victory"
        if left[1] < 100 and right[1] < 100:
            st = "Hands Up"
        if left[0] < 150:
            st = "Left"
        if right[0] > 750:
            st = "Right"
        if abs(left[0] - right[0]) < 50 and abs(left[1] - right[1]) < 50:
            st = "Clapping"
        head_top = lmList[0][1:]
        if abs(right[0] - head_top[0]) < 50 and abs(right[1] - head_top[1]) < 50:
            st = "Slap"
        elif abs(left[0] - head_top[0]) < 50 and abs(left[1] - head_top[1]) < 50:
            st = "Slap"
        forehead = lmList[0][1:]
        if abs(right[0] - forehead[0]) < 50 and abs(right[1] - forehead[1]) < 50:
            st = "Salute"
        
        # Voice output and stacking unique gestures
        if counter == 10:
            counter = 0
        if counter == 0 and (not stack or st != stack[-1]):
            engine = pyttsx3.init()
            engine.say(st)
            engine.runAndWait()
            stack.append(st)
        
        counter += 1
        
        # Put text in a rectangle
        (w, h), _ = cv2.getTextSize(st, cv2.FONT_HERSHEY_PLAIN, 5, 3)
        cv2.rectangle(img, (20, 20), (20 + w, 70 + h), (0, 0, 0), cv2.FILLED)
        cv2.putText(img, st, (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 0, 255), 3)
    
    # Detect face mesh
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(img_rgb)
    
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp.solutions.drawing_utils.draw_landmarks(
                img, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS,
                mp.solutions.drawing_utils.DrawingSpec(color=(0, 255, 0), thickness=1, circle_radius=1),
                mp.solutions.drawing_utils.DrawingSpec(color=(0, 0, 255), thickness=1)
            )
            
            # Analyze facial gestures
            landmarks = face_landmarks.landmark
            
            # Example landmarks for mouth corners
            left_mouth_corner = landmarks[61]
            right_mouth_corner = landmarks[291]
            top_lip = landmarks[13]
            bottom_lip = landmarks[14]
            
            # Happy (smile) detection
            if (top_lip.y < left_mouth_corner.y and top_lip.y < right_mouth_corner.y and
                bottom_lip.y > left_mouth_corner.y and bottom_lip.y > right_mouth_corner.y):
                st = "Happy"
            
            # Sad detection
            if (top_lip.y > left_mouth_corner.y and top_lip.y > right_mouth_corner.y and
                bottom_lip.y < left_mouth_corner.y and bottom_lip.y < right_mouth_corner.y):
                st = "Sad"
            
            
            (w, h), _ = cv2.getTextSize(st, cv2.FONT_HERSHEY_PLAIN, 5, 3)
            cv2.rectangle(img, (20, 20), (20 + w, 70 + h), (0, 0, 0), cv2.FILLED)
            cv2.putText(img, st, (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 0, 255), 3)
        
    # Display the image
    cv2.imshow("New Project", img)
    cv2.waitKey(1)

In [None]:
import time
import pyautogui
import pyttsx3
import cv2
import mediapipe as mp
import PoseEstimationModule as pem

# Initialize video capture and pose detector
cap = cv2.VideoCapture(0)
detector = pem.poseDetector()

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(max_num_faces=1)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Initialize counter and stack for gesture recognition
counter = 0
st = ""
stack = []

while True:
    success, img = cap.read()
    img = cv2.resize(img, (900, 650))
    img = cv2.flip(img, 1)
    hCam, wCam, _ = img.shape
    
    # Detect pose and get landmark positions
    detector.findPose(img, draw=True)
    lmList = detector.getPosition(img, draw=True)
    
    if lmList:
        p1, p2 = lmList[1][1:], lmList[23][1:]
        left, right = lmList[18][1:], lmList[19][1:]
        shoulder = lmList[12][1:]
        
        # Calculate distances for gesture recognition
        l, _, _ = detector.findDistance(p1, p2)
        l1, _, _ = detector.findDistance(left, right)
        flag = 0
        st = ""
        
        
        
        # Voice output and stacking unique gestures
        if counter == 10:
            counter = 0
        if counter == 0 and (not stack or st != stack[-1]):
            engine.say(st)
            engine.runAndWait()
            stack.append(st)
        
        counter += 1
        
        # Put text in a rectangle
        (w, h), _ = cv2.getTextSize(st, cv2.FONT_HERSHEY_PLAIN, 5, 3)
        cv2.rectangle(img, (20, 20), (20 + w, 70 + h), (0, 0, 0), cv2.FILLED)
        cv2.putText(img, st, (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 0, 255), 3)
    
    # Detect face mesh
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(img_rgb)
    
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp.solutions.drawing_utils.draw_landmarks(
                img, face_landmarks, mp_face_mesh.FACEMESH_CONTOURS,
                mp.solutions.drawing_utils.DrawingSpec(color=(0, 255, 0), thickness=1, circle_radius=1),
                mp.solutions.drawing_utils.DrawingSpec(color=(0, 0, 255), thickness=1)
            )
            
            # Analyze facial gestures
            landmarks = face_landmarks.landmark
            
            # Example landmarks for mouth corners
            left_mouth_corner = landmarks[61]
            right_mouth_corner = landmarks[291]
            top_lip = landmarks[13]
            bottom_lip = landmarks[14]
            
            # Convert normalized landmarks to pixel coordinates
            left_mouth_corner = (int(left_mouth_corner.x * wCam), int(left_mouth_corner.y * hCam))
            right_mouth_corner = (int(right_mouth_corner.x * wCam), int(right_mouth_corner.y * hCam))
            top_lip = (int(top_lip.x * wCam), int(top_lip.y * hCam))
            bottom_lip = (int(bottom_lip.x * wCam), int(bottom_lip.y * hCam))
            
            # Calculate distances for smile detection
            mouth_width = right_mouth_corner[0] - left_mouth_corner[0]
            lip_height = bottom_lip[1] - top_lip[1]
            
            # Detect smile (mouth width is greater than a threshold)
            if mouth_width > 60 and lip_height < 20:
                st = "Smile"
            
            # Detect sleep gesture (both eyes closed)
            left_eye_top = landmarks[159]
            left_eye_bottom = landmarks[145]
            right_eye_top = landmarks[386]
            right_eye_bottom = landmarks[374]
            
            # Convert normalized landmarks to pixel coordinates
            left_eye_top = (int(left_eye_top.x * wCam), int(left_eye_top.y * hCam))
            left_eye_bottom = (int(left_eye_bottom.x * wCam), int(left_eye_bottom.y * hCam))
            right_eye_top = (int(right_eye_top.x * wCam), int(right_eye_top.y * hCam))
            right_eye_bottom = (int(right_eye_bottom.x * wCam), int(right_eye_bottom.y * hCam))
            
            left_eye_height = left_eye_bottom[1] - left_eye_top[1]
            right_eye_height = right_eye_bottom[1] - right_eye_top[1]
            
            # Detect sleep gesture if both eyes are closed (eye height is less than a threshold)
            if left_eye_height < 5 and right_eye_height < 5:
                st = "Sleep"
            
            # Voice output for facial gestures
            if st in ["Smile", "Sleep"]:
                engine.say(st)
                engine.runAndWait()
            
            # Display text for the detected facial gesture
            if st in ["Smile", "Sleep"]:
                (w, h), _ = cv2.getTextSize(st, cv2.FONT_HERSHEY_PLAIN, 5, 3)
                cv2.rectangle(img, (20, 20), (20 + w, 70 + h), (0, 0, 0), cv2.FILLED)
                cv2.putText(img, st, (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (255, 0, 255), 3)
        
    # Display the image
    cv2.imshow("New Project", img)
    cv2.waitKey(1)