# 8 Reviewing videoannotations in Label Studio

[Label Studio](https://labelstud.io/) is a tool for creating training data for machine learning. It is browser based and can be used to annotate video, images and other data. 

We will use it to review the annotations we made in the previous steps. 


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ultralytics
import fiftyone as fo
import logging
import sys

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
# Add project root to path and import utils
project_root = os.path.join("..")
sys.path.append(project_root)

from src.config import PATH_CONFIG
from src.utils.io_utils import getProcessedVideos, saveProcessedVideos, getFaceData, getSpeechData, getKeyPoints, getVideoProperty
from src.utils.notebook_utils import display_config_info, ensure_dir_exists
from src.utils.keypoint_utils import normalize_keypoints
from src.processors.keypoint_processor import process_keypoints_for_modeling
from src.processors.face_processor import normalize_facial_keypoints, match_faces_to_poses

# Get paths from config
videos_in = PATH_CONFIG['videos_in']
data_out = PATH_CONFIG['data_out']

# Ensure output directory exists
if ensure_dir_exists(data_out):
    print(f"Created output directory: {data_out}")

# Display configuration information
display_config_info(videos_in, data_out, "Processing Configuration")

# Use the configured filename from PATH_CONFIG
processedvideos = getProcessedVideos(data_out)
processedvideos.head()

## 8.1 Launch Label Studio

Let's build the command to launch Label Studio inside a docker container.
Getting the folder paths correct is a bit tricky. There is some guidance here - [Label Studio Documentation — Start commands for Label Studio](https://labelstud.io/guide/start#Run_Label_Studio_on_Docker_and_use_local_storage)

THis is how i set it up.

$PWD = C:/Users/caspar/OneDrive

OneDrive/ls/data/

OneDrive/ls/files/


In [None]:
#Let's build a Power Shell command to launch Label Studio inside a docker container.
command = [
    'docker run -it',
    '-p', "8080:8080", #port it runs on
    '-e', "LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED=true",  #necessary environment variables
    '-e', "LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT=/label-studio",
    '-v', "$PWD/ls/data:/label-studio/data", #data volume location and mount point
    '-v', "$PWD/ls/files:/label-studio/files", #files volume location and mount point
    'heartexlabs/label-studio:latest', 'label-studio' #docker image to use
]
# Use shell=True for Windows and pass the command as a string
command_str = ' '.join(command)
result = subprocess.run(command_str, check=True, shell=True, capture_output=True, text=True)



In [None]:
command_str

In [None]:
# Define the URL where Label Studio is accessible and the API key for your user account
LABEL_STUDIO_URL = 'http://localhost:8080'
API_KEY = 'd6f8a2622d39e9d89ff0dfef1a80ad877f4ee9e3'

# Import the SDK and the client module
from label_studio_sdk.client import LabelStudio

# Connect to the Label Studio API and check the connection
ls = LabelStudio(base_url=LABEL_STUDIO_URL, api_key=API_KEY)


### 7.1.1 Is dataset already created?

FiftyOne may aleady have a dataset created. Let's check. And reload it. 

In [None]:
datasets = fo.list_datasets()
if len(datasets) == 0:
    print("No datasets found. Load in step 7.1.2")
else:
    print("Loading saved datasets: ", datasets[0])
    dataset = fo.load_dataset(datasets[0])

In [None]:
fo.delete_datasets("*")

Now let's see if we can add our metadata classifications. Recalling that each video demos one joke type `[Peekaboo,TearingPaper,NomNomNom,ThatsNotAHat,ThatsNotACat]` and has rating of how funny the baby found it `[Not Funny, Slightly Funny, Funny, Extremely Funny]` and whether they laughed `[Yes, No]`.


Let's add the frame by frame annotations directly onto the videos inside fiftyone

#### Add the speech as temporal annotations 

In [None]:
def framerange_from_timestamps(timestamps, fps, max_frames):
    start = max(int(timestamps[0]*fps)+1 ,1)
    end =  min(int(timestamps[1]*fps)+1, max_frames )
    return start, end


In [None]:
for sample in dataset:
    videoname = os.path.basename(sample.filepath)
    fps = sample.metadata["frame_rate"]
    max_frames = sample.metadata["total_frame_count"]
    print(fps)
    speechdata = utils.getSpeechData(processedvideos,videoname)
    if speechdata is None:
        print(f"Speech data not found for {videoname}")
        continue
    phrases = []
    for phrase in speechdata["segments"]:
        start, end = framerange_from_timestamps([phrase["start"],phrase["end"]], fps, max_frames)
        print (start, end)
        phrases.append(fo.TemporalDetection(label=phrase["text"],
                                        support=[start,end]))
        print(phrase["text"])
        
    sample["Speech"] = fo.TemporalDetections(detections=phrases)
    sample["Speech"] = phrases
    sample.save()

dataset.save()

In [None]:
sample = dataset.first()


dets =[
        fo.TemporalDetection(label="meeting", support=[10, 20]),
        fo.TemporalDetection(label="party", support=[30, 60]),
    ]

sample["events"] = fo.TemporalDetections(
    detections= dets
)

print(sample)

In [None]:
sample.save()

In [None]:
for sample in dataset:
    videoname = os.path.basename(sample.filepath)
    speechdata = utils.getSpeechData(processedvideos,videoname)
    if speechdata is None:
        print(f"Speech data not found for {videoname}")
        continue
    
    subtitles = speechdata["segments"]
    # Create a list of text annotations
    text_annotations = [
        fo.Detection(
            text=sub["text"],
            start_time=sub["start"],
            end_time =sub["end"]
        )
        for sub in subtitles
    ]
    sample["subtitles"] = fo.Detections(detections=text_annotations)    
    sample.save()

14.299


In [None]:
print(session.selected)

In [None]:
#session.selected contains the indices of the dataset samples clicked on in the UI.
if len(session.selected) == 0:
    print("No samples selected. Click the checkbox in the top left of each video to select it.")
else:
    print(dataset[session.selected[0]])

# 7.2 Draw annotated timeline for a select video 

A group of visualisations to see what happens in a video. 

In each frame let's find the `centre of gravity` for each person (the average of all the high-confidence marker points). This is handy for time series visualisation. For example plotting the cog.x for each person over time shows how they move closer and further from each other. 

Let's get the keypoint data and calculate

In [None]:
emotionColors = {"angry":{"color":"red","arousal":0.9,"valence":-0.2},
                 "fear":{"color":"orange","arousal":0.2,"valence":-0.9},
                 "happy":{"color":"yellow","arousal":0.2,"valence":0.9},
                 "neutral":{"color":"grey","arousal":0,"valence":0},
                 "sad":{"color":"blue","arousal":-0.2,"valence":-0.9},
                 "surprise":{"color":"green","arousal":0.9,"valence":0.2},
                 "disgust":{"color":"purple","arousal":-0.7,"valence":-0.7}}
who = ["child", "adult"]

In [None]:
plotCoGrav = True
plotStDev = True
plotSpeech = True
plotEmotions = True

#numerical sum of boolean flags
subplots = sum([plotCoGrav, plotStDev, plotSpeech, plotEmotions])

if len(session.selected) == 0:
    print("No video selected")
    exit()

VideoID = dataset[session.selected[0]]["VideoID"]
keypoints = utils.readKeyPointsFromCSV(processedvideos,VideoID)
FPS = utils.getVideoProperty(processedvideos, VideoID, "FPS")
xmax = keypoints["frame"].max()
#this bit of pandas magic calculates average x and y for all the rows.
keypoints[["cogx","cogy"]] = keypoints.apply(lambda row: calcs.rowcogs(row.iloc[8:59]), axis=1, result_type='expand')
keypoints[["stdx","stdy"]] = keypoints.apply(lambda row: calcs.rowstds(row.iloc[8:59]), axis=1, result_type='expand')

#going to add a subplot foe each of the above flags
plt.figure(figsize=(20, 5*subplots))
plt.suptitle("Video Time Line Plots")
pltidx = 0
if plotCoGrav:
    ax = plt.subplot(subplots, 1, pltidx + 1)
    pltidx += 1
    ax.set_xlabel("Time (seconds)")
    ax.set_ylabel("Horizontal Position")
    ax.set_xlim(0, xmax/FPS)
    child = keypoints[keypoints["person"]=="child"]
    adult = keypoints[keypoints["person"]=="adult"]
    #a plot of child's centre of gravity frame by frame
    childplot = ax.plot(child["frame"], child["cogx"], c="red", alpha=0.5)
    ## add line of adult's centre of gravity
    adultplot = ax.plot(adult["frame"], adult["cogx"], c="blue", alpha=0.5)
    #add legend
    ax.legend(['child', 'adult'], loc='upper left')

if plotStDev:
    ax = plt.subplot(subplots, 1, pltidx + 1)
    pltidx += 1
    ax.set_xlabel("Time (seconds)")
    ax.set_ylabel("Horizontal Position")
    ax.set_xlim(0, xmax/FPS)
    child = keypoints[keypoints["person"]=="child"]
    adult = keypoints[keypoints["person"]=="adult"]
    #a plot of child's centre of gravity frame by frame
    childplot = ax.plot(child["frame"], child["stdx"], c="red", alpha=0.5)
    ## add line of adult's centre of gravity
    adultplot = ax.plot(adult["frame"], adult["stdx"], c="blue", alpha=0.5)
    #add legend
    ax.legend(['child', 'adult'], loc='upper left')

if plotSpeech:
    ax2 = plt.subplot(subplots, 1, pltidx + 1)
    pltidx += 1
    ax2.set_xlabel("Time (seconds)")
    ax2.set_ylabel("Identified Speech")
    speechjson = utils.getSpeechData(processedvideos,VideoID)
    if speechjson is not None:
        nsegs = len(speechjson["segments"])
        ax2.set_xlim(0, xmax/FPS)
        ax2.set_ylim(0, nsegs)
        #let's plot the speech segments as boxes
        #label each one with the text
        for idx, seg in enumerate(speechjson["segments"]):
            # #rectangle with the start and end times as x coordinates and nsegs - idx as y coordinates
            #fill the rectangle
            ax2.fill([seg["start"], seg["end"], seg["end"], seg["start"]], [nsegs - idx - 1, nsegs - idx - 1, nsegs - idx, nsegs - idx], 'r', alpha=0.5)
            ax2.text(seg["start"], nsegs- idx -.5 , seg["text"])

if plotEmotions:
    ax3 = plt.subplot(subplots, 1, pltidx + 1)
    pltidx += 1
    ax3.set_xlabel("Time (seconds)")
    ax3.set_ylim(0, 2)
    ax3.set_xlim(0, xmax/FPS)  
    emotions = utils.getFaceData(processedvideos,VideoID)
    emotions["ticker"] = 1
    for index in range(2):
        ems = emotions[emotions["index"]==index]
        #who is the person we are plotting
        # key gives the emotion name, data gives the actual values (also labels)
        for key, data in ems.groupby('emotion'):
            #plot scatter plot of emotion occurances
            ax3.scatter(data["frame"], data["ticker"] + index, label=key, c=emotionColors[key]["color"], alpha=0.5, s=100)

        
    #show legend with emotion colours
    plt.legend(loc='best')



plt.show()




Let's plot the captions.
Go through the speechjson. For each speech segment add a horizotal line with the text. Start and End times from the speechjson.

Now let's do a timeline for the emotions of the participants.
We'll experiment to find best visualisation. 
Note this assumes that faces are correctly assigned to correct indviduals. 
TODO - Code that uses bounding boxes to assign faces to individuals.

First we will try a 'scatter' graph. Color coded for each emotion. 

In [None]:
import json

def scan_folder(source_folder):
    data_list = []
    id_counter = 1

    for root, dirs, files in os.walk(source_folder):
        for file in files:
            if file.endswith('.mp4'):
                file_path = os.path.join(root, file)
                audio_path = f"/data/local-files/?d={file_path}"
                video_path = f"/data/local-files/?d={file_path}"

                entry = {
                    "id": id_counter,
                    "data": {
                        "audio": audio_path,
                        "video": video_path
                    }
                }
                data_list.append(entry)
                id_counter += 1

    return data_list

In [None]:
filejson = scan_folder(r"C:\Users\caspar\OneDrive\data\LookitLaughter.videos")