In [None]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
from torchvision.models import resnet50

In [None]:
# Load pre-trained ResNet-50 model
model = resnet50(pretrained=True)
model.eval()

In [None]:
# Define transformations to preprocess images
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to extract features using ResNet-50
def extract_features(image):
    if image is None:
        return None

    image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    image = preprocess(image)
    image = torch.unsqueeze(image, 0)  # Add batch dimension
    with torch.no_grad():
        features = model(image)
    return features.squeeze().numpy()

# Function to compare feature embeddings
def cosine_similarity(embedding1, embedding2):
    return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))


In [None]:

# Load YOLO model and weights
net = cv2.dnn.readNet("/content/yolov3.weights", "/content/darknet/cfg/yolov3.cfg")

# Load coco class labels
classes = []
with open("/content/darknet/data/coco.names", "r") as f:
    classes = f.read().strip().split("\n")

# Get output layer indices
output_layer_indices = net.getUnconnectedOutLayers()

# Get output layer names
output_layers = [net.getLayerNames()[idx - 1] for idx in output_layer_indices]

# Function to detect persons using YOLO
def detect_persons(frame):
    height, width, _ = frame.shape

    # Resize frame to YOLO input size and normalize
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    # Pass blob through the network
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Process detections
    boxes = []
    confidences = []
    class_ids = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:  # 0 corresponds to 'person' class in COCO dataset
                # Extract coordinates of bounding box
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply non-maximum suppression to remove overlapping boxes
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Filter detected persons
    detected_boxes = []
    if len(indexes) > 0:
        for i in indexes.flatten():
            detected_boxes.append(boxes[i])
    return detected_boxes

# Function to draw bounding boxes around detected persons with IDs
def draw_boxes_with_ids(image, boxes, ids):
    for box, person_id in zip(boxes, ids):
        x, y, w, h = box
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(image, str(person_id), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)


In [None]:

# Load and process videos
video1 = cv2.VideoCapture("/content/5.mp4")
video2 = cv2.VideoCapture("/content/4.mp4")

# Initialize variables for storing person IDs
person_ids_video1 = {}
person_ids_video2 = {}
next_person_id_video1 = 1
next_person_id_video2 = 1

# Select the best frame for reference from the first video
reference_frame_data_video1 = []
num_reference_frames = 5
while len(reference_frame_data_video1) < num_reference_frames:
    ret, frame = video1.read()
    if not ret:
        break

    detected_boxes = detect_persons(frame)
    for box in detected_boxes:
        x, y, w, h = box
        person_image = frame[y:y+h, x:x+w]

        if person_image.size != 0:
            person_embedding = extract_features(person_image)
            reference_frame_data_video1.append((person_embedding, box))

# Initialize a dictionary to store embeddings of people detected in video 1
person_embeddings_video1 = {}

# Assign unique IDs to people detected in video 1
for embedding, box in reference_frame_data_video1:
    if embedding.tobytes() not in person_embeddings_video1:
        person_embeddings_video1[embedding.tobytes()] = next_person_id_video1
        next_person_id_video1 += 1
    person_ids_video1[tuple(box)] = person_embeddings_video1[embedding.tobytes()]

# Match persons detected in video 2 with those in the reference frames from video 1
while True:
    ret, frame = video2.read()
    if not ret:
        break

    if frame is not None:
        detected_boxes = detect_persons(frame)

        for box in detected_boxes:
            x, y, w, h = box
            person_image = frame[y:y+h, x:x+w]

            if person_image.size != 0:
                person_embedding = extract_features(person_image)

                # Check if the embedding matches with any of the embeddings from video 1
                matched_embedding = None
                for ref_embedding in reference_frame_data_video1:
                    similarity = cosine_similarity(person_embedding, ref_embedding[0])
                    if similarity > 0.8:  # Adjust threshold as needed
                        matched_embedding = ref_embedding
                        break

                if matched_embedding is not None:
                    # Check if the matched box is present in person_ids_video1
                    matched_box_tuple = tuple(matched_embedding[1])  # Convert list to tuple
                    if matched_box_tuple in person_ids_video1:
                        # Assign the same ID as the matched person from video 1
                        person_ids_video2[tuple(box)] = person_ids_video1[matched_box_tuple]
                    else:
                        # Assign a new ID for this person
                        person_ids_video2[tuple(box)] = next_person_id_video2
                        next_person_id_video2 += 1
                else:
                    # No matching embedding found, assign a new ID for this person
                    person_ids_video2[tuple(box)] = next_person_id_video2
                    next_person_id_video2 += 1

# Reset video captures to process them again
video1.release()
video2.release()
video1 = cv2.VideoCapture("/content/5.mp4")
video2 = cv2.VideoCapture("/content/4.mp4")

# Initialize variables for video writers
frame_width_video1 = int(video1.get(3))
frame_height_video1 = int(video1.get(4))
out_video1 = cv2.VideoWriter('output_video1.mp4', cv2.VideoWriter_fourcc(*'MP4V'), 30, (frame_width_video1, frame_height_video1))
frame_width_video2 = int(video2.get(3))
frame_height_video2 = int(video2.get(4))
out_video2 = cv2.VideoWriter('output_video2.mp4', cv2.VideoWriter_fourcc(*'MP4V'), 30, (frame_width_video2, frame_height_video2))

# Process videos and draw bounding boxes with IDs
while True:
    ret1, frame1 = video1.read()
    ret2, frame2 = video2.read()
    if not ret1 or not ret2:
        break

    if frame1 is not None:
        detected_boxes = detect_persons(frame1)
        # Get IDs for detected persons in video 1
        person_ids = [person_ids_video1.get(tuple(box), 0) for box in detected_boxes]  # Convert box to tuple
        # Draw bounding boxes with IDs for video 1
        draw_boxes_with_ids(frame1, detected_boxes, person_ids)
        out_video1.write(frame1)  # Write the processed frame to the output video

    if frame2 is not None:
        detected_boxes = detect_persons(frame2)
        # Get IDs for detected persons in video 2
        person_ids = [person_ids_video2.get(tuple(box), 0) for box in detected_boxes]  # Convert box to tuple
        # Draw bounding boxes with IDs for video 2
        draw_boxes_with_ids(frame2, detected_boxes, person_ids)
        out_video2.write(frame2)  # Write the processed frame to the output video

# Release resources
video1.release()
video2.release()
out_video1.release()
out_video2.release()
cv2.destroyAllWindows()