In [8]:
import cv2
import numpy as np
from ultralytics import YOLO

model = YOLO('yolov8n-pose.pt')  # Load the YOLO pose estimation model
def detect_close_wrists_to_head(results):
    """
    Detects when wrist keypoints are close to the head keypoint of another person.

    Args:
        results: YOLO pose estimation results.

    Returns:
        close_interactions: A list of tuples, where each tuple contains the indices 
                           of the two people involved in a close interaction.
    """

    close_interactions = []
    keypoints = results[0].keypoints.xy.cpu().numpy()  # Get keypoints for all detected people

    for i in range(len(keypoints)):
        for j in range(i + 1, len(keypoints)):  # Compare each person with others

            person1_wristR = keypoints[i][10]  # Right wrist of person i (adjust index as needed)
            person1_wristL = keypoints[i][9]  # Right wrist of person i (adjust index as needed)

            person2_head = keypoints[j][0]  # Head of person j (adjust index as needed)
          

            distanceR = np.linalg.norm(person1_wristR - person2_head)  # Calculate distance
            distanceL = np.linalg.norm(person1_wristL - person2_head)  # Calculate distance

            distance_threshold = 100  # Adjust this threshold as needed

            if distanceR < distance_threshold or distanceL < distance_threshold:
                close_interactions.append((i, j))  # Store the indices of interacting people
            
                
         

    return close_interactions

video_path = 'fight_2.mp4'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

output_path = "result_2.avi"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame,conf=0.5)  # Run pose estimation

    close_interactions = detect_close_wrists_to_head(results)

    # Draw bounding boxes and keypoints
    for *xyxy, conf, cls in results[0].boxes.data:
        cv2.rectangle(frame, (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])), (0, 255, 0), 2)

    # Print information for close interactions
    for person1_index, person2_index in close_interactions:
        person1_id = results[0].boxes.cls[person1_index].item()  # Assuming person class is used
        person2_id = results[0].boxes.cls[person2_index].item()


        results[0].boxes.cls

        print(f"Frame: {cap.get(cv2.CAP_PROP_POS_FRAMES)}, "
              f"Person {person1_id}'s wrist is close to Person {person2_id}'s head!")
        print(results[0].boxes.cls)

        cv2.putText(frame, f'HIT', (250, 250), cv2.FONT_HERSHEY_SIMPLEX, 10, (0, 255, 0), 2)
    
    out.write(frame)

    cv2.imshow("Output", frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 4 persons, 9.5ms
Speed: 1.3ms preprocess, 9.5ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 16.8ms
Speed: 4.9ms preprocess, 16.8ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9.9ms
Speed: 2.4ms preprocess, 9.9ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 16.4ms
Speed: 5.5ms preprocess, 16.4ms inference, 5.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 14.7ms
Speed: 3.1ms preprocess, 14.7ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 40.2ms
Speed: 4.4ms preprocess, 40.2ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 10.5ms
Speed: 3.4ms preprocess, 10.5ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11.6ms
Speed: 2.9ms preprocess, 11.6ms inference, 2.7ms postprocess per image at shape (1, 3