In [None]:
import numpy as np
import cv2
import networkx as nx
import math
import matplotlib.pyplot as plt
import torch
import json
import os
import glob

from PIL import Image
from transformers import pipeline
from our_utils import *

video_name = 'Dubai_UAE - youtube video AjwcqYZ6cIw [AjwcqYZ6cIw]'
#video_name = 'Iquitos_Peru - youtube video 0WpiskskL6Y [0WpiskskL6Y]'
#video_name = 'London_United Kingdom - LONDON 4K Walking Tour (UK) - 4h Tour with Captions Immersive Sound [4K Ultra HD60fps] [8WlUiln-VeY]'

pattern = glob.escape(video_name) + "/*.json"
file = glob.glob(pattern)
file = file[0]
print(file)


with open(f"{file}", "r") as fp:
    data = json.load(fp)
    
cmap = plt.get_cmap("tab20")
colormap = plt.cm.get_cmap("tab20", 20)

N = 20
colors = [cmap(i) for i in range(N)]
palette = (np.array(colors)[:, :3] * 255).astype(np.uint8)

hash_table = {}

device = 'cuda' if torch.cuda.is_available() else 'cpu'
pipe = pipeline(
    task="depth-estimation",
    model="depth-anything/Depth-Anything-V2-Large-hf",
    device=0  # Use GPU
)
    
for ind, frame in enumerate(data['frames']):
    
    edges = []
    detections = frame['detections']

    image = cv2.imread(f'{video_name}/video/{ind}.jpg')
    frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    result = pipe(opencv_to_pil(frame))
    depth_pil = result["depth"]
    depth_array = np.array(depth_pil)

    height, width = depth_array.shape
    camera_intrinsics=None
    if camera_intrinsics is None:
        #camera_intrinsics = get_intrinsics(height, width, fov=160.0)
        camera_intrinsics = get_intrinsics(width, height, fov=160.0)
    
    depth_image = np.maximum(depth_array, 1e-5)
    depth_image = 100.0 / depth_image
    X, Y, Z = pixel_to_point(depth_image, True, camera_intrinsics)
 
    coord3 = np.zeros((1000,width,3),dtype=np.uint8)+255
    coord2 = np.zeros((1000,1000,3),dtype=np.uint8)+255
    coord1 = np.zeros((1000,width,3),dtype=np.uint8)+255

    for int_id, det_i in enumerate(detections):

        t_id = int(det_i['track_id'])
        o1_x1, o1_y1, o1_x2, o1_y2 = map(int, det_i['bbox'])
        o1_mid = ((o1_x1+o1_x2)//2, (o1_y1+o1_y2)//2)
        
        d1 =  int(depth_array[o1_mid[1],o1_mid[0]])
        sex = det_i['sex']
        direction = det_i['direction']

        for int_jd, det_j in enumerate(detections):
            if int_id != int_jd:
                o2_x1, o2_y1, o2_x2, o2_y2 = map(int, det_j['bbox'])
                o2_mid = ((o2_x1+o2_x2)//2, (o2_y1+o2_y2)//2)

                d2 = depth_array[o2_mid[1], o2_mid[0]]
                t_jd = int(det_j['track_id'])

                dist = euclidean_distance(o1_mid, o2_mid)

                x_d  = abs(int(X[o1_mid[1],o1_mid[0]]) - int(X[o2_mid[1],o2_mid[0]]))
                z_d  = abs(int(d1) -int(d2))
        
                if x_d*100 < 50 and z_d < 5:
                    edges.append((t_id, t_jd))

    G = nx.DiGraph()
    G.add_edges_from(edges)
    
    components = [sorted(list(c)) for c in nx.strongly_connected_components(G)]
    groups = np.arange(1000)
    for idx, comp in enumerate(components, 1):
        
        key = "-".join(str(c) for c in comp)
        for c in comp:
            groups[c] = idx

        if key not in hash_table:
            hash_table[key]=1
        else:
            hash_table[key]+=1

    
    for int_id, det in enumerate(detections):
        
        x1, y1, x2, y2 = map(int, det['bbox'])
        track_id = det['track_id']
        class_name = 'person'#model.model.names[class_id]
        label = f"{class_name} ID:{track_id}"
        rgb_tuple = (int(palette[track_id%20][0]),int(palette[track_id%20][1]),int(palette[track_id%20][2]))

        o1_mid = ((x1+x2)//2, (y1+y2)//2)
        d1 =  depth_array[o1_mid[1], o1_mid[0]] #int(det['z_depth'])

        center1 = (int((X[o1_mid[1],o1_mid[0]]+2)*100), 1000- int(Z[o1_mid[1],o1_mid[0]]*100))
        center2 = (int((X[o1_mid[1],o1_mid[0]]+2)*100), int(d1)*4)
        center3 = (int(o1_mid[0]), int(d1)*4)
        
        direction = det['direction']
        offset = 25
        
        plot_coord(coord1, center1, direction, offset, rgb_tuple)
        plot_coord(coord2, center2, direction, offset, rgb_tuple)
        plot_coord(coord3, center3, direction, offset, rgb_tuple)
        
        
        if class_name == 'person':
            draw_tracking(frame=image, bbox = det['bbox'], label=label, color=rgb_tuple)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    coord1 = cv2.cvtColor(coord1, cv2.COLOR_BGR2RGB)
    coord2 = cv2.cvtColor(coord2, cv2.COLOR_BGR2RGB)
    coord3 = cv2.cvtColor(coord3, cv2.COLOR_BGR2RGB)
    
    
    pos = nx.spring_layout(G, seed=42)
        
    node_colors = [colormap(n%20) for n in G.nodes()]

    
    # Draw the graph on a figure
    fig, ax = plt.subplots(figsize=(4, 4))
    nx.draw(
        G, pos=pos, ax=ax,
        with_labels=True,
        node_color=node_colors,
        edge_color="gray"
    )
    ax.set_axis_off()
    plt.tight_layout()
    
    fig.canvas.draw()
    graph = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    graph = graph.reshape(fig.canvas.get_width_height()[::-1] + (4,))  # 4 channels
    graph = graph[:,:,:3]
    graph = cv2.cvtColor(graph, cv2.COLOR_BGR2RGB)
    plt.close(fig)

    combined_frame1 = np.hstack([cv2.resize(image, (600, 320)), cv2.resize(coord2, (600, 320))])    
    combined_frame2 = np.hstack([cv2.resize(coord3, (600, 320)), cv2.resize(graph, (600, 320))])
    combined_frame = np.vstack([combined_frame1, combined_frame2])
    
    plt.figure(figsize=(36, 8))
    plt.imshow(combined_frame)
    plt.axis("off")
    plt.title(f"Frame {ind}")
    plt.show()
    
print(hash_table)