# Installation Part(Only need to run once)

In [None]:
%%bash

cd models/research
# Compile protos.
protoc object_detection/protos/*.proto --python_out=.
# Install TensorFlow Object Detection API.
cp object_detection/packages/tf1/setup.py .
python3 -m pip install .

# Running Part

In [1]:
import cv2

In [2]:
import os
import glob
import pandas as pd
import io
import xml.etree.ElementTree as ET
import argparse
from PIL import Image, ImageDraw, ImageFont
from object_detection.utils import dataset_util, label_map_util, visualization_utils as viz_utils
from collections import namedtuple
import time
import tensorflow
from tensorflow.keras.models import load_model

import os
import cv2
import numpy as np
import csv
from collections import deque
from moviepy.editor import VideoFileClip,AudioFileClip,concatenate_videoclips
# setting min confidence threshold
MIN_CONF_THRESH=.25

PATH_TO_SAVED_MODEL_ALL=r'./source/saved_model'

detect_fn = tensorflow.saved_model.load(PATH_TO_SAVED_MODEL_ALL)


PATH_TO_LABELS_ALL=r'./source/annotations/label_map.pbtxt'

category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS_ALL,use_display_name=True)

red_model=tensorflow.keras.models.load_model(r'./source/model/arrow_red_model.h5')
left_model=tensorflow.keras.models.load_model(r'./source/model/arrow_left_model.h5')
avatar_is_blue_model=tensorflow.keras.models.load_model(r'./source/model/avatar_is_blue_model.h5')

2023-09-08 17:23:30.100391: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-09-08 17:23:30.100421: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2023-09-08 17:23:30.100436: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2023-09-08 17:23:30.100520: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-09-08 17:23:30.100563: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)














In [3]:
def load_image_into_numpy_array(path):
    """Load an image from file into a numpy array.
    Puts image into numpy array of shape (height, width, channels), where channels=3 for RGB to feed into tensorflow graph.
    Args:
      path: the file path to the image
    Returns:
      uint8 numpy array with shape (img_height, img_width, 3)
    """
    return np.array(cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB))

def deduplicate(detections,conf_thresh):
  """
    deduplicate each class in detection, only keep the one with highest confidence score

    Parameters:
    detection: detection generated by detect_fn.

    Returns:
    detection that has only each class with highest confidence score
  """

  appearedClassNum = []
  appearedIndex=[]
  for i in range(detections['detection_boxes'].shape[0]):
    class_num = int(detections['detection_classes'][i])
    if class_num not in appearedClassNum and detections['detection_scores'][i]>conf_thresh:
      appearedClassNum.append(class_num)
      appearedIndex.append(i)
    if len(appearedClassNum) == 3:
      break
  appearedClassNum = sorted(appearedClassNum)
  detections['detection_boxes'] = np.array([detections['detection_boxes'][i] for i in appearedIndex]).reshape((len(appearedClassNum), 4))
  detections['detection_classes'] =  np.array([detections['detection_classes'][i] for i in appearedIndex]).reshape((len(appearedClassNum),))
  detections['detection_scores'] =  np.array([detections['detection_scores'][i] for i in appearedIndex]).reshape((len(appearedClassNum),))
  detections['num_detections']=len(appearedClassNum)
  return detections

def crop_arrow_image(image, detections):
    """
    Crop an image with a given bounding box.

    Parameters:
    image (numpy.ndarray): The input image to crop.
    bbox (tuple): A tuple of four integers (xmin, ymin, xmax, ymax) that specify the bounding box coordinates.

    Returns:
    numpy.ndarray: The cropped image.
    """

    HEIGHT=1080
    LENGTH=1920
    for i in range(detections['detection_boxes'].shape[0]):
      if detections['detection_classes'][i]==2 and detections['detection_scores'][i]>=0.25: #label of arrow
        yminr, xminr, ymaxr, xmaxr = detections['detection_boxes'][i]
        ymin = int(yminr * HEIGHT)
        xmin = int(xminr * LENGTH)
        ymax = int(ymaxr * HEIGHT)
        xmax = int(xmaxr * LENGTH)

        cropped_image = image[ymin:ymax, xmin:xmax]
        return cropped_image
    return None

def preprocess_image(im, desired_size=224, resample=Image.BOX):
    """
    preprocess the arrow image resolution to 224x224 to feed in neural network

    Parameters:
    im: PIL image class

    Returns:
    im: Image after resample to 224x224
    """

    im = im.resize((desired_size, )*2, resample=resample)
    return im

def predict_red(img_array,model):
  """
  predict if img_array is a red arrow or a white arrow

  Parameters:
  img_array: numpyndarray [0..1], shape=[224,224,3] representing the arrow image
  model: model for predicting red arrow.

  Returns:
  boolean: True if arrow red, False if arrow white
  """

  img_array=np.expand_dims(img_array, axis=0)
  prediction = model.predict(img_array,verbose=0)
  binary_prediction = int(prediction[0][0] > 0.5)
  if binary_prediction == 1:
      return True
  else:
      return False

def predict_left(img_array,model):
  """
  predict if img_array is a left arrow or a right arrow

  Parameters:
  img_array: numpyndarray [0..1], shape=[224,224,3] representing the arrow image
  model: model for predicting direction of the arrow./~bseward/103a_fall22/

  Returns:
  boolean: True if arrow is left, False if arrow is right
  """
  img_array=np.expand_dims(img_array, axis=0)
  prediction = model.predict(img_array,verbose=0)
  binary_prediction = int(prediction[0][0] > 0.5)
  if binary_prediction == 1:
    return True
  else:
    return False

def impose_left_text(img_array, isLeft=False):
  """
    write arrow direction result in prediction video

    Parameters:
    img_array: numpyndarray [0...255], shape=[1080,1920,3] representing the whole image
    isLeft: supposed arrow presents, if the arrow is left in this image

    Returns:
    imposed_array: the whole image imposed with direction of arrow
  """

  img = Image.fromarray(np.uint8(img_array))
  draw = ImageDraw.Draw(img)
  if isLeft==True:
    text_to_impose='left arrow'
  else:
    text_to_impose='right arrow'
  font = ImageFont.truetype(r'arial.ttf', 90)
  draw.text((10, 10), text_to_impose,font=font, fill=(52, 235, 70))
  # Convert the PIL Image back to a NumPy array
  imposed_array = np.array(img)
  return imposed_array

def impose_red_text(img_array, isRed=False):
  """
    write arrow color result in prediction video

    Parameters:
    img_array: numpyndarray [0...255], shape=[1080,1920,3] representing the whole image
    isRed: supposed arrow presents, if the arrow is red or white in this image

    Returns:
    imposed_array: the whole image imposed with color of arrow
  """

  img = Image.fromarray(np.uint8(img_array))
  draw = ImageDraw.Draw(img)
  font = ImageFont.truetype(r'arial.ttf', 100)
  if isRed==True:
    text_to_impose='red arrow'
    draw.text((10, 110), text_to_impose ,font=font, fill=(255, 0, 0))
  else:
    text_to_impose='white arrow'
    draw.text((10, 110), text_to_impose,font=font, fill=(180, 180, 200))
  # Convert the PIL Image back to a NumPy array
  imposed_array = np.array(img)
  return imposed_array

def impose_avatar_text(img_array, isBlue):
  img = Image.fromarray(np.uint8(img_array))
  draw = ImageDraw.Draw(img)
  font = ImageFont.truetype(r'arial.ttf', 100)
  if isBlue==True:
    text_to_impose='blue avatar'
    draw.text((10, 770), text_to_impose ,font=font, fill=(150, 150, 150))
  else:
    text_to_impose='green avatar'
    draw.text((10, 770), text_to_impose,font=font, fill=(150, 150, 150))
  # Convert the PIL Image back to a NumPy array
  imposed_array = np.array(img)
  return imposed_array

def impose_frame_count(img_array, frame_count):
  img = Image.fromarray(np.uint8(img_array))
  draw = ImageDraw.Draw(img)
  font = ImageFont.truetype(r'arial.ttf', 100)
  text_to_impose='frame_count: '+str(frame_count)
  draw.text((10, 870), text_to_impose ,font=font, fill=(108,108,108))
  # Convert the PIL Image back to a NumPy array
  imposed_array = np.array(img)
  return imposed_array

In [4]:
def get_detections(image_np, detect_fn):
  input_tensor = tensorflow.convert_to_tensor(image_np)
  input_tensor = input_tensor[tensorflow.newaxis, ...]
  detections = detect_fn(input_tensor)
  num_detections = int(detections.pop('num_detections'))
  detections = {key: value[0, :num_detections].numpy()
                for key, value in detections.items()}
  detections['num_detections'] = num_detections
  detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
  return detections

def grayscale_yellow(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    yellow_hue_lower = np.array([20, 50, 100])  # Lower yellow hue range for light yellow
    yellow_hue_upper = np.array([40, 255, 255])  # Upper yellow hue range
    yellow_mask = cv2.inRange(hsv, yellow_hue_lower, yellow_hue_upper)
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gray_yellow = cv2.bitwise_and(gray, gray, mask=yellow_mask)
    return gray_yellow

def get_gaze(image_np):
    blurred = cv2.GaussianBlur(image_np, (21, 21), 0)
    gray = grayscale_yellow(blurred)
    # Threshold the grayscale image to create a binary image
    _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        perimeter = cv2.arcLength(contour, True)
        area = cv2.contourArea(contour)

        # Check if the perimeter is non-zero before calculating circularity
        if perimeter > 0:
            circularity = 4 * np.pi * (area / (perimeter * perimeter))
            (x, y), radius = cv2.minEnclosingCircle(contour)
            if circularity > 0.5 and radius>20:
                ymin=int(y-radius)
                xmin=int(x-radius)
                ymax=int(y+radius)
                xmax=int(x+radius)
                ymin=max(ymin,0)
                xmin=max(xmin,0)
                ymax=min(ymax,1080)
                xmax=min(xmax,1920)

                return True, (ymin, xmin, ymax, xmax)
    return False, None


def impose_gaze_sqaure(img_array, isGaze,gaze_bbox):
  """
    draw a rectangle around the gaze in prediction video

    Parameters:
    img_array: numpyndarray [0...255], shape=[1080,1920,3] representing the whole image
    isGaze: if gaze exists or not

    Returns:
    imposed_array: the whole image imposed with a rectangular around circle
  """

  img = Image.fromarray(np.uint8(img_array))
  draw = ImageDraw.Draw(img)
  font = ImageFont.truetype(r'arial.ttf', 90)
  if isGaze==True:
    text_to_impose="Gaze"
    draw.text((10, 210), text_to_impose ,font=font, fill=(255, 255, 0))
    xmin=gaze_bbox[1]
    ymin=gaze_bbox[0]
    xmax=gaze_bbox[3]
    ymax=gaze_bbox[2]
    draw.rectangle([(xmin,ymin), (xmax, ymax)], outline=(0, 128, 255), width=5)
  imposed_array = np.array(img)
  return imposed_array

def get_object_and_bbox(detections):
  '''
  use after deduplication, bbox format:(ymin,xmin,ymax,xmax)
  '''
  avatar,avatar_bbox, arrow, arrow_bbox, sphere,sphere_bbox=[None for i in range(6)]
  avatar=False
  arrow=False
  sphere=False

  HEIGHT=1080
  LENGTH=1920
  for i in range(detections['detection_boxes'].shape[0]):
    if detections['detection_classes'][i]==2: #label of arrow
      yminr, xminr, ymaxr, xmaxr = detections['detection_boxes'][i]
      ymin = int(yminr * HEIGHT)
      xmin = int(xminr * LENGTH)
      ymax = int(ymaxr * HEIGHT)
      xmax = int(xmaxr * LENGTH)
      arrow=True
      arrow_bbox=(ymin, xmin, ymax, xmax)
    elif detections['detection_classes'][i]==1:
      yminr, xminr, ymaxr, xmaxr = detections['detection_boxes'][i]
      ymin = int(yminr * HEIGHT)
      xmin = int(xminr * LENGTH)
      ymax = int(ymaxr * HEIGHT)
      xmax = int(xmaxr * LENGTH)
      sphere=True
      sphere_bbox=(ymin, xmin, ymax, xmax)
    elif detections['detection_classes'][i]==3:
      yminr, xminr, ymaxr, xmaxr = detections['detection_boxes'][i]
      ymin = int(yminr * HEIGHT)
      xmin = int(xminr * LENGTH)
      ymax = int(ymaxr * HEIGHT)
      xmax = int(xmaxr * LENGTH)
      avatar=True
      avatar_bbox=(ymin, xmin, ymax, xmax)

  return avatar,avatar_bbox, arrow, arrow_bbox, sphere,sphere_bbox

def gaze_at_arrow(gaze,gaze_box,arrow, arrow_bbox):
  return False

def gaze_at_avatar(gaze,gaze_box, avatar, avatar_bbox):
  return False

def sphere_at_avatar(sphere,sphere_box,avatar, avatar_bbox):
  return False

def arrow_is_left_white(image,detections,left_model,red_model):
  arrow=crop_arrow_image(image,detections)
  if type(arrow)!=type(None):
    arrow=preprocess_image(Image.fromarray(arrow))
    arrow= np.array(arrow)
    arrow_float=arrow.astype(float)
    arrow_float=arrow_float*1./255
    isRed=predict_red(arrow_float,red_model)
    isLeft=predict_left(arrow_float,left_model)
    return isLeft,not isRed
  else:
    return False,False
def crop_avatar_image(image, detections):
    """
    Crop an image with a given bounding box.

    Parameters:
    image (numpy.ndarray): The input image to crop.
    bbox (tuple): A tuple of four integers (xmin, ymin, xmax, ymax) that specify the bounding box coordinates.

    Returns:
    numpy.ndarray: The cropped image.
    """

    HEIGHT=1080
    LENGTH=1920
    for i in range(detections['detection_boxes'].shape[0]):
      if detections['detection_classes'][i]==3 and detections['detection_scores'][i]>=0.25: #label of arrow
        yminr, xminr, ymaxr, xmaxr = detections['detection_boxes'][i]
        ymin = int(yminr * HEIGHT)
        xmin = int(xminr * LENGTH)
        ymax = int(ymaxr * HEIGHT)
        xmax = int(xmaxr * LENGTH)
        cropped_image = image[ymin:ymax, xmin:xmax]
        # if ss==True:
        #   save_dir=r'/content/drive/MyDrive/avatar/'
        #   image_pil = Image.fromarray(cropped_image.astype(np.uint8))
        #   file_name = os.path.join(save_dir, str(fc)+".jpg")
        #   image_pil.save(file_name)
        return cropped_image
    return None

def get_avatar_is_blue(image, detections,avatar_is_blue_model):
  avatar=crop_avatar_image(image,detections)
  if type(avatar)!=type(None):
    avatar=preprocess_image(Image.fromarray(avatar))
    avatar= np.array(avatar)
    avatar_float=avatar.astype(float)
    avatar_float=avatar_float*1./255
    isBlue=predict_red(avatar_float,avatar_is_blue_model)
    ret=-1 if isBlue==True else 1
    return ret
  else:
    return None

In [5]:
def check_bbox_overlap(bbox_1,bbox_2,tolerance=0):
    ymin_1, xmin_1, ymax_1, xmax_1 = bbox_1
    ymin_2, xmin_2, ymax_2, xmax_2 = bbox_2

    # Calculate the intersection coordinates
    xmin_intersection = max(xmin_1, xmin_2)
    ymin_intersection = max(ymin_1, ymin_2)
    xmax_intersection = min(xmax_1, xmax_2)
    ymax_intersection = min(ymax_1, ymax_2)

    # Check if there is an intersection
    if xmin_intersection < xmax_intersection and ymin_intersection < ymax_intersection:
        return True
    else:
        return False
def get_gaze_at_arrow(gaze, gaze_bbox,arrow,arrow_bbox,tolerance=0):
  if gaze==True and arrow==True:
    return check_bbox_overlap(gaze_bbox,arrow_bbox,tolerance)
  else:
    return False

def get_gaze_at_avatar(gaze, gaze_bbox,avatar,avatar_bbox,tolerance=0):
  if gaze==True and avatar==True:
    return check_bbox_overlap(gaze_bbox,avatar_bbox,tolerance)
  else:
    return False

def get_sphere_at_avatar(sphere, sphere_bbox,avatar,avatar_bbox,tolerance=0):
  if sphere==True and avatar==True:
    return check_bbox_overlap(sphere_bbox,avatar_bbox,tolerance)
  else:
    return False
def impose_inference_text(image_np,gaze_at_arrow,gaze_at_avatar,sphere_at_avatar):

  '''
  impose the text of gaze_at_arrow, gaze_at_avatar and sphere_at_avatar on image
  image: image np_array
  text: text to impose
  '''
  img = Image.fromarray(np.uint8(image_np))
  draw = ImageDraw.Draw(img)
  font = ImageFont.truetype(r'arial.ttf', 90)
  if gaze_at_arrow==True:
    text_to_impose="gaze_at_arrow"
    draw.text((10, 310), text_to_impose ,font=font, fill=(245, 11, 220))
  if gaze_at_avatar==True:
    text_to_impose="gaze_at_avatar"
    draw.text((10, 410), text_to_impose ,font=font, fill=(11, 226, 245))
  if sphere_at_avatar==True:
    text_to_impose="sphere_at_avatar"
    draw.text((10, 510), text_to_impose ,font=font, fill=(104, 67, 222))
  imposed_array = np.array(img)
  return imposed_array

def impose_shot_detected_text(image_np,shot_detected):
  img = Image.fromarray(np.uint8(image_np))
  draw = ImageDraw.Draw(img)
  font = ImageFont.truetype(r'arial.ttf', 90)
  if shot_detected==True:
    text_to_impose="shot detected"
    draw.text((10, 610), text_to_impose ,font=font, fill=(126, 231, 188))
  imposed_array = np.array(img)
  return imposed_array

In [6]:
def get_max_values(arr):
    arr = arr[::-1]
    result = []
    window = deque()
    max_value = float('-inf')

    for num in arr:
        window.append(num)

        if num >= max_value:
            max_value = num

        if len(window) > 44100*0.04:
            if window[0] == max_value:
                window.popleft()
                max_value = max(window)
            else:
                window.popleft()

        result.append(max_value)

    return result[::-1]

def create_shot(PATH,amplitude_threshold=0.30):
  #audio_data=AudioFileClip(PATH).to_soundarray() #audio.to_soundarray normalize the amplitude
  #audio_data=audio_data[:,0]
    
    
    
  audio_data=np.stack(AudioFileClip(PATH).iter_frames())[:,0]

  shot=get_max_values(audio_data)
  shot=np.array(shot)
  shot=(shot>=amplitude_threshold)

  time = np.linspace(0, audio_data.shape[0] / 44100, len(audio_data))
  abnormal_segment = shot[int(44100*6):int(44100*6.001)+1]
  time_segment = time[int(44100*6):int(44100*6.001)+1]

  return shot

from moviepy.editor import VideoFileClip, AudioClip

def add_audio_to_video(audio_source_video_path, output_raw_path, output_path):
    video = VideoFileClip(output_raw_path)
    audio=VideoFileClip(audio_source_video_path).audio
    print(audio)
    video = video.set_audio(audio)
    video.write_videofile(output_path, codec="rawvideo",logger=None) 

In [7]:
def post_processing_csv(CSV_PATH):
  df = pd.read_csv(CSV_PATH)
  #get coordinates of estimated sphere
  indices = df[df['sphere_is_estimated'] == True].index
  length=len(df)
  for i in indices:
    estimate_index=None
    offset=1
    while(True):
      j=i-offset
      if j>=0 and df.iloc[j]['sphere']==True:
        estimate_index=j
        break
      j=i+offset
      if j<length and df.iloc[j]['sphere']==True:
        estimate_index=j
        break
      offset+=1
    df.at[i, 'sphere'] = True
    df.at[i, 'sphere_bbox'] = df.at[estimate_index, 'sphere_bbox']
    df.at[i, 'sphere_ymin'] = df.at[estimate_index, 'sphere_ymin']
    df.at[i, 'sphere_xmin'] = df.at[estimate_index, 'sphere_xmin']
    df.at[i, 'sphere_ymax'] = df.at[estimate_index, 'sphere_ymax']
    df.at[i, 'sphere_xmax'] = df.at[estimate_index, 'sphere_xmax']

  #update count_arrow and count_avatar columns
  arrow_count=0
  avatar_count=0
  #62 frames is 2500 ms, the interval we want
  arrow_interval=62
  avatar_interval=62
  threshold=62
  last_arrow=-62
  last_avatar=-62
  for index in range(len(df)):
    is_arrow_in_5_frame=True
    is_avatar_in_5_frame=True

    if index-last_arrow>=threshold:
      for j in range(5):
        if index+j<len(df) and df.iloc[index+j]['arrow']!=True:
          is_arrow_in_5_frame=False
          break

    if index-last_avatar>=threshold:
      for j in range(5):
        if index+j<len(df) and df.iloc[index+j]['avatar']!=True:
          is_avatar_in_5_frame=False
          break
    if is_arrow_in_5_frame==True and index-last_arrow>=threshold:
      arrow_count=arrow_count+1
      df.at[index,'arrow_count']=arrow_count
      last_arrow=index
    if is_avatar_in_5_frame==True and index-last_avatar>=threshold:
      avatar_count=avatar_count+1
      df.at[index,'avatar_count']=avatar_count
      last_avatar=index

  #get first_frame_of_gaze_at_avatar
  last_avatar_count=0
  current_avatar_count=0
  for index in range(len(df)):
    if not pd.isnull(df.at[index,'avatar_count']):
      current_avatar_count=df.at[index,'avatar_count']
    if df.at[index,'gaze_at_avatar']==True and current_avatar_count!=last_avatar_count:
      first_frame_of_gaze_at_avatar=True
      last_avatar_count=current_avatar_count
    else:
      first_frame_of_gaze_at_avatar=False
    df.at[index,'first_frame_of_gaze_at_avatar']=first_frame_of_gaze_at_avatar

  #add arrow_is_5_to_8_frames_before_avatar, avatar_is_5_to_8_frames_after_arrow
  for index in range(len(df)):
    if not pd.isnull(df['arrow_count'][index]):
      for i in range(5,9):
        if (index+i)<len(df) and not pd.isnull(df.at[index+i,'avatar_count']):
          df.at[index,'arrow_is_5_to_8_frames_before_avatar']=True
          break
      else:
        df.at[index,'arrow_is_5_to_8_frames_before_avatar']=False
    else:
      df.at[index,'arrow_is_5_to_8_frames_before_avatar']=None
  for index in range(len(df)):
    if not pd.isnull(df['avatar_count'][index]):
      for i in range(5,9):
        if (index-i)>=0 and not pd.isnull(df.at[index-i,'arrow_count']):
          df.at[index,'avatar_is_5_to_8_frames_after_arrow']=True
          break
      else:
        df.at[index,'avatar_is_5_to_8_frames_after_arrow']=False
    else:
      df.at[index,'avatar_is_5_to_8_frames_after_arrow']=None

  #add arrow_remains_longtime and avatar_remains_longtime
  if 'BEAM' in CSV_PATH:
    avatar_interval=60
    arrow_interval=65
  else:
    avatar_interval=45
    arrow_interval=50
  for index in range(len(df)-66):
    if not pd.isnull(df['arrow_count'][index]):
      if df['arrow'][index+arrow_interval-1]==True and df['arrow'][index+arrow_interval]==True and df['arrow'][index+arrow_interval+1]==True:
        df.at[index,'arrow_remains_longtime']=True
      else:
        df.at[index,'arrow_remains_longtime']=False

    if not pd.isnull(df['avatar_count'][index]):
      if df['avatar'][index+avatar_interval-1]==True and df['avatar'][index+avatar_interval]==True and df['avatar'][index+avatar_interval+1]==True:
        df.at[index,'avatar_remains_longtime']=True
      else:
        df.at[index,'avatar_remains_longtime']=False
  df = df.replace({True: 1, False: 0})

  #add trial count
  # for index in range(1,len(df)):
  #   if not pd.isnull(df['arrow_count'][index]):
  #     df.at[index,'trial_count']=df['trial_count'][index-1]+1;
  #   else:
  #     df.at[index,'trial_count']=df['trial_count'][index-1]
  # for index in range(len(df)):
  #   if df['arrow_is_5_to_8_frames_before_avatar'][index]==1 and df['gaze_at_arrow'][index]==1:
  #     df.at[index,'trial_start_time']=df['timee'][index]
  #   if df['first_frame_of_gaze_at_avatar'][index]==1:
  #     df.at[index,'saccade_rt']=df['timee'][index]
  #   if df['first_frame_of_shot'][index]==1:
  #     df.at[index,'manual_rt']=df['timee'][index]

  #formulate version:
  for index in range(3,len(df)):
    df.at[index,'trial_count']='=IF(ISNUMBER(M{ind1}),C{ind}+1,C{ind})'.format(ind1=index+2,ind=index+1)
  for index in range(2,len(df)):
    df.at[index,'trial_start_time']='=IF(N{ind}=1,A{ind},"")'.format(ind=index+2)
    df.at[index,'saccade_rt']='=IF(S{ind}=1,A{ind},"")'.format(ind=index+2)
    df.at[index,'manual_rt']='=IF(U{ind}=1,A{ind},"")'.format(ind=index+2)



  #avatar_direction
  for index in range(9,len(df)-1):
    if df['avatar'][index]==1 and df['arrow'][index]==1:
      count=0
      sum=0
      for i in range(index-9,index+1):
        if not pd.isnull(df['avatar_LR'][i]):
          count=count+1
          sum=sum+df['avatar_LR'][i]
      if count>=5:
        df.at[index,'avatar_direction']=sum/count

  #avatar_color
  for index in range(9,len(df)-1):
    if df['avatar'][index]==1:
      count=0
      sum=0
      for i in range(index-9,index+1):
        if not pd.isnull(df['avatar_is_blue'][i]):
          count=count+1
          sum=sum+df['avatar_is_blue'][i]
      if count>=5:
        df.at[index,'avatar_color']=sum/count

  df.to_excel(CSV_PATH.split('.')[0]+'.xlsx', index=False)

In [8]:
def process(VIDEO_PATH,OUTPUT_PATH,CSV_PATH,draw,beam):
  data = []
  arrow_is_5_to_8_frames_before_avatar,avatar_is_5_to_8_frames_after_arrow,timee,avatar,avatar_bbox,arrow,arrow_bbox,sphere,sphere_is_estimated,sphere_bbox,gaze,gaze_bbox, gaze_at_arrow,gaze_at_avatar,sphere_at_avatar,shot_detected,first_frame_of_shot,arrow_is_left,arrow_is_white,avatar_is_blue,first_frame_of_gaze_at_avatar,avatar_remains_longtime,arrow_remains_longtime=[None for i in range(23)]
  arrow_count=None
  avatar_count=None
  avatar_LR=None

  trial_count=0
  trial_start_time=None
  saccade_rt=None
  manual_rt=None
  avatar_direction=None
  avatar_color=None

  cap = cv2.VideoCapture(VIDEO_PATH)
  frame_width = int(cap.get(3))
  frame_height = int(cap.get(4))
  frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
  num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  print("Video resolution: {}x{}".format(frame_width, frame_height))
  print("Video fps: {}".format(frame_rate))
  print("Total number of frames: {}".format(num_frames))
  print("processing now")
  prev_frame_shot=False
  if draw:
    out = cv2.VideoWriter(OUTPUT_PATH,cv2.VideoWriter_fourcc(*'MJPG'), frame_rate, (frame_width,frame_height),True)
    
  frame_count = 0
  start_time = time.time()
  shot_raw=create_shot(VIDEO_PATH)
  while(frame_count<num_frames): #csv: frame_count
    # Capture frame-by-frame
      if (beam==False and frame_count==num_frames//2):
        out.release()
        OUTPUT_PATH=OUTPUT_PATH.split(".")[0]+" part2."+OUTPUT_PATH.split(".")[1]
        out = cv2.VideoWriter(OUTPUT_PATH,cv2.VideoWriter_fourcc(*'MJPG'), frame_rate, (frame_width,frame_height),True)
      if (beam==True and frame_count==num_frames//3):
        out.release()
        OUTPUT_PATH=OUTPUT_PATH.split(".")[0]+" part2."+OUTPUT_PATH.split(".")[1]
        out = cv2.VideoWriter(OUTPUT_PATH,cv2.VideoWriter_fourcc(*'MJPG'), frame_rate, (frame_width,frame_height),True)
      elif(beam==True and frame_count==2*(num_frames//3)):
        out.release()
        OUTPUT_PATH=OUTPUT_PATH.split(".")[0]+" part3."+OUTPUT_PATH.split(".")[1]
        out = cv2.VideoWriter(OUTPUT_PATH,cv2.VideoWriter_fourcc(*'MJPG'), frame_rate, (frame_width,frame_height),True)

      timee=frame_count/25
      if frame_count%200==0:
          print(frame_count)
      ret, frame = cap.read()
      if ret != True:
          break


      shot_detected=shot_raw[int(timee*44100)]
      if shot_detected and not prev_frame_shot:
          first_frame_of_shot=True
      else:
          first_frame_of_shot=False
      prev_frame_shot=shot_detected
      output = np.zeros_like(frame)
      image_np = np.array(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))


      gaze,gaze_bbox=get_gaze(image_np)

      detections=get_detections(image_np,detect_fn)

      detections=deduplicate(detections,MIN_CONF_THRESH)

      if(beam==False):
        avatar_is_blue=get_avatar_is_blue(image_np,detections,avatar_is_blue_model)

      avatar,avatar_bbox, arrow, arrow_bbox, sphere,sphere_bbox = get_object_and_bbox(detections)
      arrow_is_left,arrow_is_white=arrow_is_left_white(image_np,detections,left_model,red_model)

      gaze_at_arrow=get_gaze_at_arrow(gaze, gaze_bbox,arrow,arrow_bbox)

      #3 times the length of bounding boxes in gaze_at_avatar
      if gaze_bbox!=None and avatar_bbox!=None:
        ymin, xmin, ymax, xmax=gaze_bbox
        ymid=(ymax+ymin)/2
        three_ymax=min(ymid+(ymax-ymid)*3,1080)
        three_ymin=max(ymid-(ymax-ymid)*3,0)
        xmid=(xmax+xmin)/2
        three_xmax=min(xmid+(xmax-xmid)*3,1920)
        three_xmin=max(xmid-(xmax-xmid)*3,0)
        three_gaze_bbox=(three_ymin, three_xmin, three_ymax, three_xmax)

        ymin, xmin, ymax, xmax=avatar_bbox
        ymid=(ymax+ymin)/2
        three_ymax=min(ymid+(ymax-ymid)*3,1080)
        three_ymin=max(ymid-(ymax-ymid)*3,0)
        xmid=(xmax+xmin)/2
        three_xmax=min(xmid+(xmax-xmid)*3,1920)
        three_xmin=max(xmid-(xmax-xmid)*3,0)
        three_avatar_bbox=(three_ymin, three_xmin, three_ymax, three_xmax)
      else:
        three_gaze_bbox=None
        three_avatar_bbox=None

      gaze_at_avatar=get_gaze_at_avatar(gaze, three_gaze_bbox,avatar,three_avatar_bbox)
      sphere_at_avatar=get_sphere_at_avatar(sphere, sphere_bbox,avatar,avatar_bbox)

      if draw==True:
        image_np_with_detections = image_np.copy()
        viz_utils.visualize_boxes_and_labels_on_image_array(
              image_np_with_detections,
              detections['detection_boxes'],
              detections['detection_classes'],
              detections['detection_scores'],
              category_index,
              use_normalized_coordinates=True,
              min_score_thresh=MIN_CONF_THRESH,
              agnostic_mode=False)
        image_np_with_detections=impose_gaze_sqaure(image_np_with_detections,gaze,gaze_bbox)
        image_np_with_detections=impose_inference_text(image_np_with_detections,gaze_at_arrow,gaze_at_avatar,sphere_at_avatar)
        image_np_with_detections=impose_frame_count(image_np_with_detections,frame_count)
        if arrow==True:
          image_np_with_detections=impose_left_text(image_np_with_detections,arrow_is_left)
          image_np_with_detections=impose_red_text(image_np_with_detections,not arrow_is_white)
        if shot_detected==True:
          image_np_with_detections=impose_shot_detected_text(image_np_with_detections,shot_detected)
        if beam==False and avatar==True:
          image_np_with_detections=impose_avatar_text(image_np_with_detections,avatar_is_blue)
        out.write(cv2.cvtColor(image_np_with_detections,cv2.COLOR_RGB2BGR))



      avatar_ymin,avatar_xmin,avatar_ymax,avatar_xmax,arrow_ymin,arrow_xmin,arrow_ymax,arrow_xmax,sphere_ymin,sphere_xmin,sphere_ymax,sphere_xmax,avatar_ymid,gaze_ymin,gaze_xmin,gaze_ymax,gaze_xmax=[None for i in range(17)]
      if avatar_bbox!=None:
        avatar_ymin,avatar_xmin,avatar_ymax,avatar_xmax=avatar_bbox
        avatar_ymid=round((avatar_ymin+avatar_ymax)/2)
      if arrow_bbox!=None:
        arrow_ymin,arrow_xmin,arrow_ymax,arrow_xmax=arrow_bbox
      if sphere_bbox!=None:
        sphere_ymin,sphere_xmin,sphere_ymax,sphere_xmax=sphere_bbox
      if gaze_bbox!=None:
        gaze_ymin,gaze_xmin,gaze_ymax,gaze_xmax=gaze_bbox
      if first_frame_of_shot==True and sphere==False:
        sphere_is_estimated=True
      else:
        sphere_is_estimated=False

      if avatar==True and arrow==True:
        avatar_LR=-1 if (avatar_xmin+avatar_xmax)/2<(arrow_xmin+arrow_xmax)/2 else 1

      if avatar==False:
        avatar_is_blue=None
      if arrow==False:
        arrow_is_left=None
        arrow_is_white=None

      row_data = [timee, frame_count,trial_count,trial_start_time,saccade_rt,manual_rt,arrow_is_left,arrow_is_white,avatar_direction,avatar_color,
                  avatar_is_blue,arrow, arrow_count,arrow_is_5_to_8_frames_before_avatar,
                  avatar,avatar_count,avatar_is_5_to_8_frames_after_arrow,gaze_at_arrow,first_frame_of_gaze_at_avatar,gaze_at_avatar,
                  first_frame_of_shot,sphere_at_avatar,arrow_remains_longtime,avatar_remains_longtime,
                  arrow_bbox,arrow_ymin,arrow_xmin,arrow_ymax,arrow_xmax,
                  avatar_bbox, avatar_ymin,avatar_xmin,avatar_ymax,avatar_xmax,avatar_ymid,avatar_LR,
                  sphere, sphere_is_estimated, sphere_bbox,sphere_ymin,sphere_xmin,sphere_ymax,sphere_xmax,
                  gaze,gaze_bbox,gaze_ymin,gaze_xmin,gaze_ymax,gaze_xmax,
                  shot_detected]
      data.append(row_data)
      frame_count+=1
  cap.release()

  columns = ['timee', 'frame_count', 'trial_count', 'trial_start_time', 'saccade_rt', 'manual_rt', 'arrow_is_left', 'arrow_is_white','avatar_direction','avatar_color',
                 'avatar_is_blue', 'arrow', 'arrow_count', 'arrow_is_5_to_8_frames_before_avatar',
                 'avatar', 'avatar_count', 'avatar_is_5_to_8_frames_after_arrow', 'gaze_at_arrow', 'first_frame_of_gaze_at_avatar', 'gaze_at_avatar',
                 'first_frame_of_shot', 'sphere_at_avatar', 'arrow_remains_longtime', 'avatar_remains_longtime',
                'arrow_bbox', 'arrow_ymin', 'arrow_xmin', 'arrow_ymax', 'arrow_xmax',
                 'avatar_bbox', 'avatar_ymin', 'avatar_xmin', 'avatar_ymax', 'avatar_xmax', 'avatar_ymid','avatar_LR',
                 'sphere', 'sphere_is_estimated', 'sphere_bbox', 'sphere_ymin', 'sphere_xmin', 'sphere_ymax', 'sphere_xmax',
                 'gaze', 'gaze_bbox', 'gaze_ymin', 'gaze_xmin', 'gaze_ymax', 'gaze_xmax',
                 'shot_detected']

  df = pd.DataFrame(data, columns=columns)
  df.to_csv(CSV_PATH, index=False)
  post_processing_csv(CSV_PATH)

  if draw:
    out.release()
    #add_audio_to_video(VIDEO_PATH, OUTPUT_PATH, OUTPUT_PATH.split('.')[0]+'with_sound.'+OUTPUT_PATH.split('.')[1])

  print('Done! Took {} seconds'.format(time.time()-start_time))
  print(str(frame_count)+' frame processed; '+OUTPUT_PATH+' generated')
  print(CSV_PATH+' generated')


In [9]:
#VPATH = path with videos to be processed
VPATH = 'videos/'
#output path for csv files'
CSVPATH = 'predictions/csvs/'
OUTPUTPATH='/Users/fusionmac/Documents/TBICoE/FusionVR/content/predictions/videos/'
video_list=[]
output_list=[]
csv_list=[]
tmp_list=['V059','V008']
for path in os.listdir(VPATH):
  for i in tmp_list:
    if i in path:
      video_list.append(VPATH + path)
      output_list.append((OUTPUTPATH+path).split('.')[0]+'.avi')
      csv_list.append(CSVPATH+path.split('.')[0]+'.csv')
      break

i=0
while i<len(video_list):
  print(video_list[i],output_list[i],csv_list[i],True,'BEAM' in video_list[i])
  process(video_list[i],output_list[i],csv_list[i],True,'BEAM' in video_list[i])
  print(str(i)+" is done!")
  i=i+1

videos/V008_0Back.mp4 predictions/videos/V008_0Back.avi predictions/csvs/V008_0Back.csv True False
Video resolution: 1920x1080
Video fps: 25
Total number of frames: 13620
processing now


  process(video_list[i],output_list[i],csv_list[i],True,'BEAM' in video_list[i])



0


2023-09-08 17:25:22.129749: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-09-08 17:25:46.168298: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-09-08 17:25:46.270386: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-09-08 17:25:48.111793: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
Done! Took 4103.441537857056 seconds
13620 frame processed; predictions/videos/V008_0Back.avi generated
predictions/csvs/V008_0Back.csv generated
0 is done!
videos/V008_BEAM.mp4 predictions/videos/V008_BEAM.avi predictions/csvs/V008_BEAM.csv True True
Video resolution: 1920x1080
Video fps: 25
Total number of frames: 20048
processing now


  process(video_list[i],output_list[i],csv_list[i],True,'BEAM' in video_list[i])



0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400
14600
14800
15000
15200
15400
15600
15800
16000
16200
16400
16600
16800
17000
17200
17400
17600
17800
18000
18200
18400
18600
18800
19000
19200
19400
19600
19800
20000
Done! Took 5877.531508922577 seconds
20048 frame processed; predictions/videos/V008_BEAM part2.avi generated
predictions/csvs/V008_BEAM.csv generated
1 is done!
videos/P051_V059_BEAM.mp4 predictions/videos/P051_V059_BEAM.avi predictions/csvs/P051_V059_BEAM.csv True True
Video resolution: 1920x1080
Video fps: 25
Total number of frames: 20641
processing now


  process(video_list[i],output_list[i],csv_list[i],True,'BEAM' in video_list[i])



0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400
14600
14800
15000
15200
15400
15600
15800
16000
16200
16400
16600
16800
17000
17200
17400
17600
17800
18000
18200
18400
18600
18800
19000
19200
19400
19600
19800
20000
20200
20400
20600
Done! Took 5441.037034988403 seconds
20641 frame processed; predictions/videos/P051_V059_BEAM part2.avi generated
predictions/csvs/P051_V059_BEAM.csv generated
2 is done!
