# 1.0 Extract movement and track positions over time.

For each video we use YOLOv8 to extract movement data as a set of body keypoints and use its `model.track` method to track individuals over time.


# 1.1 Video pose estimation with YOLOv8

[YOLOv8](https://github.com/ultralytics/ultralytics) is a commercially maintained version of the YOLO object recognition model. [Yolov7](https://github.com/WongKinYiu/yolov7) introduced pose estimation and v8 improves the models and makes everything much more user-friendly. It can be installed as a package

* Pip : `pip install ultralytics`
* Conda : `conda install -c conda-forge ultralytics`

## 1.2 Object tracking 

YoloV8 also comes with a `model.track` method. This aims to keep track of all identified objects over the course of a video. Let's make use of that. 

This is pretty easy instead of calling 
`results = model(video_path, stream=True)`

we can call
`results = model.track(video_path, stream=True)`

https://docs.ultralytics.com/modes/track/#persisting-tracks-loop

In [None]:
import os
import math
import sys
import time
import pandas as pd
import numpy as np
import cv2
import torch
from ultralytics import YOLO

import utils

In [None]:
videos_in = r"..\LookitLaughter.test"
metadata_file = "_LookitLaughter.xlsx"
data_out = r"..\data\1_interim"

#a couple of files for testing
VIDEO_FILE  = os.path.join(videos_in, "2UWdXP.joke1.rep2.take1.Peekaboo.mp4")
VIDEO_FILE2 = os.path.join(videos_in, "2UWdXP.joke2.rep1.take1.NomNomNom.mp4")

testset = [VIDEO_FILE, VIDEO_FILE2] 

#get metadata from excel file
metadata = pd.read_excel(os.path.join(videos_in, metadata_file))
metadata.head()

In [None]:
#get yolo model with pose estimation
model = YOLO('yolov8n-pose.pt')

In [None]:
processedvideos = utils.getprocessedvideos(data_out)
processedvideos.head()

In [None]:
#loop through each row of metadata and
#process all related videos
forcemetadata = False
forceprocess = True
tracking = True

for index, mrow in metadata.iterrows():
    #get VIDEOID from first column of metadata
    videoname = mrow["VideoID"]
    stemname = os.path.splitext(videoname)[0]
    print(f"video:{videoname}")

    #check we want to refill metadata or this video is not already in processedvideos dataframe
    if forcemetadata or videoname not in processedvideos["VideoID"].values: 
        #use cv2 to get fps and other video info to add to dataframe
        cap = cv2.VideoCapture(os.path.join(videos_in,videoname))    
        if (cap.isOpened()== False):
            print("Error opening video stream or file")
            continue
        else:
            #add row to processedvideos dataframe
            row = {"VideoID":videoname,
                "ChildID":mrow["ChildID"],
                "JokeType":mrow["JokeType"],
                "JokeNum":mrow["JokeNum"],
                "JokeRep":mrow["JokeRep"],
                "JokeTake":mrow["JokeTake"],
                "HowFunny":mrow["HowFunny"],
                "LaughYesNo":mrow["LaughYesNo"],
                "Frames":cap.get(cv2.CAP_PROP_FRAME_COUNT),
                "FPS":cap.get(cv2.CAP_PROP_FPS) , 
                "Width":cap.get(cv2.CAP_PROP_FRAME_WIDTH), 
                "Height":cap.get(cv2.CAP_PROP_FRAME_HEIGHT), 
                "Duration":cap.get(cv2.CAP_PROP_FRAME_COUNT)/cap.get(cv2.CAP_PROP_FPS)
                }
            cap.release()
            print(f"Adding video info: {row}")
            newrow = pd.DataFrame(row, index=[0])
            processedvideos = pd.concat([processedvideos,newrow], ignore_index=True)

    #select the dataframe row for this video 
    row = processedvideos.loc[processedvideos["VideoID"] == videoname]
    #check if we have already processed this video is keypoints is not nan
    if row.empty:
        print(f"error: no row for {videoname}")
        continue
    elif not forceprocess and not pd.isnull(row["Keypoints.file"].values[0]):
        print(row["Keypoints.file"].values[0]  )
        print(f"already processed {videoname}")
        continue
    else:
        #use ultralytics YOLO to get keypoints
        keypointsdf =utils.videotokeypoints(model, os.path.join(videos_in,videoname) , track = True)
        #save keypointsdf as csv    
        keypointspath = data_out + "\\" + stemname + ".csv"
        keypointsdf.to_csv(keypointspath)
        row["Keypoints.file"] = keypointspath
        row["Keypoints.when"] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
        #update this row in processedvideos dataframe
        processedvideos.loc[processedvideos["VideoID"] == videoname] = row
    

    #update processedvideos excel file
    processedvideos.to_excel(data_out + "\\processedvideos.xlsx", index=False)

In [None]:
#Let's test it out first

# Open the video file
video_path = VIDEO_FILE
cap = cv2.VideoCapture(video_path)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

In [6]:
#let's have a look at the results object for video 

results = model.track(VIDEO_FILE,stream=True)  



In [7]:
frame = 0
for r in results:
    r.
    frame += 1
    if frame == 5:
        break




AttributeError: 'Results' object has no attribute 'print'. See valid attributes below.

    A class for storing and manipulating inference results.

    Args:
        orig_img (numpy.ndarray): The original image as a numpy array.
        path (str): The path to the image file.
        names (dict): A dictionary of class names.
        boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
        masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
        probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
        keypoints (List[List[float]], optional): A list of detected keypoints for each object.

    Attributes:
        orig_img (numpy.ndarray): The original image as a numpy array.
        orig_shape (tuple): The original image shape in (height, width) format.
        boxes (Boxes, optional): A Boxes object containing the detection bounding boxes.
        masks (Masks, optional): A Masks object containing the detection masks.
        probs (Probs, optional): A Probs object containing probabilities of each class for classification task.
        keypoints (Keypoints, optional): A Keypoints object containing detected keypoints for each object.
        speed (dict): A dictionary of preprocess, inference, and postprocess speeds in milliseconds per image.
        names (dict): A dictionary of class names.
        path (str): The path to the image file.
        _keys (tuple): A tuple of attribute names for non-empty attributes.
    