In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def FrameCapture(path): 
    image_list = []
    # Path to video file 
    vidObj = cv2.VideoCapture(path) 
  
    # Get the frame rate of the video
    fps = int(vidObj.get(cv2.CAP_PROP_FPS))
  
    # Used as counter variable 
    count = 0
  
    # checks whether frames were extracted 
    success = True
  
    # Calculate how many frames between each save
    save_interval = fps // 10
  
    while success: 
        # vidObj object calls read function to extract frames 
        success, image = vidObj.read()
        if not success:
            break

        # Save 3 frames per second
        if count % save_interval == 0:
            image_list.append(image)
        count += 1

    return image_list

list_of_images = FrameCapture('first-video.mp4')
print(f"Extracted {len(list_of_images)} frames from the video.")

In [None]:
sample_frame = list_of_images[0]
print(sample_frame.shape)
sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
sample_frame = cv2.line(sample_frame, (965,1000), (965,0), (0, 255, 0),3)
sample_frame = cv2.line(sample_frame, (980,1000), (980,0), (0, 255, 0),3)

sample_frame = cv2.line(sample_frame, (1910,1000), (1910,0), (0, 255, 0),3)


sample_frame = cv2.line(sample_frame, (0,650), (1910,650), (0, 255, 0),3)

plt.imshow(sample_frame)
plt.show()

In [None]:
# The one
import faiss
import torch
import torch.nn.functional as F
import uuid
import torchreid
import cv2
import numpy as np
from collections import defaultdict, deque
from ultralytics import YOLO
import matplotlib.pyplot as plt
import time

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 50
color = (0, 0, 255)
thickness = 5

# Load models

yolo_model = YOLO('yolov3u.pt')##

extractor = torchreid.utils.FeatureExtractor(
    model_name='resnet50',
    model_path='resnet50_msmt17_combineall_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip_jitter.pth',
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

# ReID gallery with embedding history
embedding_history = defaultdict(lambda: deque(maxlen=1000))
embedding_dict = {}
people_in_queue = {}
timing_dict = {}
queue_order_dict ={}
time_diff = []
gallery_ids = []

similarity_threshold = 0.66 #0.65

def assign_id_for_person(person_features,conf):
    person_features = F.normalize(torch.tensor(person_features), p=2, dim=1).cpu().numpy()[0]  # shape: (512,)

    best_id = None
    best_score = -1

    for pid, history in embedding_history.items():
        for prev_feat in history:
            score = F.cosine_similarity(torch.tensor(person_features), torch.tensor(prev_feat), dim=0).item()
            if score > best_score:
                best_score = score
                best_id = pid
        
    print('best_score', best_score)
    if best_score > similarity_threshold:
        embedding_history[best_id].append(person_features)
        return best_id
    else:
        if conf > 0.81:
            new_id = str(uuid.uuid4())
            embedding_dict[new_id] = person_features
            people_in_queue[new_id] = person_features
            embedding_history[new_id].append(person_features)
            gallery_ids.append(new_id)
            timing_dict[new_id] = time.time()
            return new_id
        else:
            return None

frame_count = 1
avg_time = None
for image in list_of_images[:]:
    frame = image.copy()
    results = yolo_model.predict(source=frame, conf=0.75, classes=[0])#0.75

    for result in results:
        queue_order_dict = {}
        people_location = []
        for det, conf in zip(result.boxes.xyxy.cpu().numpy(), result.boxes.conf.cpu().numpy()):
            x1, y1, x2, y2 = map(int, det)
            if x1 in range(950, 980):
                continue
            

            width = abs(x2 - x1)
            height = abs(y2 - y1)
            area = width * height

            w = x2 - x1
            h = y2 - y1
            aspect_ratio = h / (w + 1e-5)
            
            if area < 16300:
                continue
            '''
            if conf < 0.7 and area <18100:
                continue
            if height < 180:
                continue
            '''

            crop = frame[y1:y2, x1:x2]
            #margin_y = int(0.1 * (y2 - y1))
            #crop = frame[y1 + margin_y:y2 - margin_y, x1:x2]
            #crop = cv2.resize(crop, (128, 256))

            embedding = extractor(crop)
            person_id = assign_id_for_person(embedding,conf)
            print('person_id', person_id)
            if y1 < 650:
                if queue_order_dict.get(person_id) is None:
                    queue_order_dict[person_id] = x1
                else:
                    queue_order_dict[person_id] = max(queue_order_dict[person_id], x1)

            print('queue_order_dict', queue_order_dict)


            if x2 > 1910:
                time_diff.append(time.time() - timing_dict[person_id])
                if person_id is not None:
                    if person_id in people_in_queue:
                        del people_in_queue[person_id]
                        

            # Draw
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            '''
            if len(time_diff) > 0:
                avg_time = sum(time_diff) / len(time_diff)
                avg_time = avg_time/20
                avg_time = round(avg_time)
            else:
                avg_time = None
            '''
            if x2 > 1910 and avg_time is None:
                avg_time = (time.time() - timing_dict[person_id])/20
            

            avg_x = (x1 + x2) / 2
            people_location.append([x1, y1,person_id,avg_x])

        
            #if person_id is not None:
            #    cv2.putText(frame, f"ID: {person_id[:5]} | Order: {queue_order_dict[person_id]}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
            #cv2.putText(frame, f' People count: {len(embedding_dict)} | Avg. time: {avg_time} sec', (0, 100), font, 2, (255, 255, 255), 5)
            cv2.putText(frame, f' People count: {len(people_in_queue)}', (0, 100), font, 2, (255, 255, 255), 5)

            print('conf', conf)
            print('area', area)
            print('aspect_ratio', aspect_ratio, 'width', width, 'height', height)
        order = 1
        people_location.sort(key=lambda x: x[3], reverse=True)
        print('people_location', people_location)
        taken_into_account = {}
        for list in people_location:
            x1, y1, person_id,avg_x = list
            if person_id is not None:
                if taken_into_account.get(person_id) is None:
                    cv2.putText(frame, f"ID: {person_id[:5]}", (x1, y1 - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                    cv2.putText(frame, f"#: {order}", (x1, y1 - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                    if avg_time is not None:
                        eta = (order - 1) * avg_time #- frame_count * 0.05 
                        if eta < 0:
                            eta = 0
                        eta = round(eta)
                        cv2.putText(frame, f"ETA:: {eta} sec", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

                    taken_into_account[person_id] = order
                    print('taken_into_account', taken_into_account)
                    order += 1
                else:
                    cv2.putText(frame, f"ID: {person_id[:5]}", (x1, y1 - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                    cv2.putText(frame, f"#: {taken_into_account[person_id]}", (x1, y1 - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                    if avg_time is not None:
                        eta = (taken_into_account[person_id] - 1) * avg_time #- frame_count * 0.02
                        if eta < 0:
                            eta = 0
                        eta = round(eta)
                        cv2.putText(frame, f"ETA:: {eta} sec", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    cv2.imwrite(f'count5/frame_{frame_count}.jpg', frame)
    frame_count += 1
    plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    #plt.axis('off')
    plt.show()

In [None]:
import os
def generate_video():
    image_folder = 'count5'
    video_name = 'first-video-counted_3.mp4'

    images = [img for img in os.listdir(image_folder) if img.endswith((".jpg", ".jpeg", ".png"))]
    images.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))  # Sort by frame number

    print("Images:", images)

    # Set frame from the first image
    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape

    # Video writer to create .avi file
    frame_rate = 10  # frame per second
    video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'DIVX'), frame_rate, (width, height))

    # Appending images to video
    for image in images:
        video.write(cv2.imread(os.path.join(image_folder, image)))

    # Release the video file
    video.release()
    cv2.destroyAllWindows()
    print("Video generated successfully!")

# Calling the function to generate the video
generate_video()