##Env Setup

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install tensorflow tensorflow-gpu tensorflow-hub matplotlib timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-gpu
  Downloading tensorflow_gpu-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (578.0 MB)
[K     |████████████████████████████████| 578.0 MB 16 kB/s 
Collecting timm
  Downloading timm-0.6.11-py3-none-any.whl (548 kB)
[K     |████████████████████████████████| 548 kB 62.1 MB/s 
Collecting tensorflow-gpu
  Downloading tensorflow_gpu-2.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (511.8 MB)
[K     |████████████████████████████████| 511.8 MB 9.7 kB/s 
Collecting huggingface-hub
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 78.0 MB/s 
Installing collected packages: huggingface-hub, timm, tensorflow-gpu
Successfully installed huggingface-hub-0.10.1 tensorflow-gpu-2.9.2 timm-0.6.11


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np
import torch
from google.colab.patches import cv2_imshow
import time

In [None]:
!nvidia-smi

Fri Oct 28 16:26:53 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

##MiDaSNet Setup

In [None]:
# Load a MiDas model for depth estimation
#model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

  "You are about to download and run code from an untrusted repository. In a future release, this won't "
Downloading: "https://github.com/intel-isl/MiDaS/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt" to /root/.cache/torch/hub/checkpoints/dpt_hybrid-midas-501f0c75.pt


  0%|          | 0.00/470M [00:00<?, ?B/s]

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


##MoveNet Setup

In [None]:
#Set GPU Growth for tf models
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# Loading the model from tensorflow hub
model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet = model.signatures['serving_default']

In [None]:
# keypoints connection map
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [None]:
# Function to draw keypoints on a frame
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -2)

In [None]:
# Function to draw connections between keypoints on a frame
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

In [None]:
# Function to loop through each person detected and render
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
  for person in keypoints_with_scores:
    draw_keypoints(frame, person, confidence_threshold) 
    draw_connections(frame, person, edges, confidence_threshold)

##Predictions

In [None]:
cap = cv2.VideoCapture('/content/gdrive/MyDrive/Blinkfire/basketball.mp4')
width= int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height= int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
writer_depth = cv2.VideoWriter('depth_map.avi', cv2.VideoWriter_fourcc(*'MJPG'), 30, (width,height))
writer_masked = cv2.VideoWriter('masked.avi', cv2.VideoWriter_fourcc(*'MJPG'), 30, (width,height))
writer_full = cv2.VideoWriter('full.avi', cv2.VideoWriter_fourcc(*'MJPG'), 30, (width,height))

while cap.isOpened():
    ret, frame = cap.read()
    if frame is None:
      break

    img = frame.copy()
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    start = time.time()

    # Apply input transforms
    input_batch = transform(img).to(device)

    # Prediction and resize to original resolution
    with torch.no_grad():
        prediction = midas(input_batch)

        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    depth_map = prediction.cpu().numpy()

    # Pose estimation
    input_img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384,640)
    input_img = tf.cast(input_img, dtype=tf.int32)
    results = movenet(input_img) 
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))

    end = time.time()
    totalTime = end - start

    fps = 1 / totalTime

    depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)

    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # Apply mask on frame
    depth_map_bin = ((depth_map > 70) & (depth_map < 170)) * 1
    img[:,:,0] = img[:,:,0] * depth_map_bin
    img[:,:,1] = img[:,:,1] * depth_map_bin
    img[:,:,2] = img[:,:,2] * depth_map_bin
    cv2.putText(img, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    writer_masked.write(img)

    # Unnormalize depth map
    depth_map = (depth_map*255).astype(np.uint8)

    # Color code depth map
    depth_map = cv2.applyColorMap(depth_map , cv2.COLORMAP_MAGMA)
    cv2.putText(depth_map, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    writer_depth.write(depth_map)

    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.2)
    cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    writer_full.write(frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()