<a href="https://colab.research.google.com/github/OliDeane/Deep_SAGA/blob/master/Deep_SAGA_System_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep SAGA

To run this system, please ensure that you have a folder in Google Drive (e.g., entitled 'Gaze_Detection_System') containing the following files:



*   your_video_file.mp4
*   your_gaze_coordinates.csv
*   maskrcnn_predict.py
*   coco_labels.txt
*   mask_rcnn_coco.h5

NB. The above files (including example video) can be found at: https://drive.google.com/drive/folders/1OAYbOeE5eIWzM-iwA0Agcg3M9MteN8JQ?usp=sharing 


Before running, change the runtime type to GPU (Runtime > Change Runtime Type > GPU). Next, insert required variables into the code:

* the path to your Google Drive folder, 
* the name of your video and gaze data files

Then run all cells. Once cell will prompt you to mount your Google Drive. When this occurs, follow the link and paste the provided URL into the insert box. 



Once all cells have finished running, the system will produce the following output files:

*   A video labelled with the currently gazed upon object for each frame
*   A list of all gazed upon objects for each frame of footage
*   Summary statistics outlining the average time spent gazing at people, vehicles and greenery.








In [23]:
""" Insert the filenames and path to the Google drive folder where all files are kept """

path = r"/content/gdrive/My Drive/Gaze_Detection_System" # Insert the path to your Google Drive folder
gaze_filename = "GD_pp1_s1_City_vidA.csv" # Insert your video filename here
video_filename = "pp1_s1_City_vidA.mp4" # Insert your gaze data filename here

In [41]:
""" Set the greenery values. These define which pixels will be identified as green. It is dependant on the lighting conditions during filming.
Currently set to default, but this can be changed. """

greenery_value_A = 80
greenery_value_B = 90

In [1]:
#@title Import all necessary packages. 
#@markdown You may have to restart the runtime and run this again to ensure that the correct versions of packages are installed.
# Import all the main packages

import pandas as pd
import numpy as np
import colorsys
import argparse
import imutils
import random
import cv2
import os
import time
from google.colab.patches import cv2_imshow
from google.colab import files
from PIL import Image, ImageDraw, ImageFont
from datetime import datetime
import io
import pandas as pd
import matplotlib.pyplot as plt
import h5py
if h5py.__version__ != '2.10.0':
  !pip install h5py==2.10.0

In [None]:
#@title Mount the Google Drive
#@markdown When prompted, follow the link, sign in to your Google Drive account and copy the provided URL in the box below.
# Mount the google drive
from google.colab import drive
drive.mount("/content/gdrive")

In [None]:
#@title Load in Tensorflow and the pre-trained Mask RCNN model
%tensorflow_version 1.x
import tensorflow as tf

#Check that tensorflow is running using the GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print(tf.__version__)

# Grab the MRCNN model from github
!pip install git+https://github.com/matterport/Mask_RCNN.git
import mrcnn
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize


In [None]:
#@title Load in MASK RCNN files and initiate the model

## Load in all the necessary files for the MRCNN and initialise the network  

# load the class label names, one label per line
os.listdir(path)
CLASS_NAMES = open(os.path.join(path,"coco_labels.txt")).read().strip().split("\n")

# generate random (but visually distinct) colors for each class label
# (thanks to Matterport Mask R-CNN for the method!)
hsv = [(i / len(CLASS_NAMES), 1, 1.0) for i in range(len(CLASS_NAMES))]
COLORS = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
random.seed(42)
random.shuffle(COLORS)

class SimpleConfig(Config):
	# give the configuration a recognizable name
	NAME = "coco_inference"
	GPU_COUNT = 1
	IMAGES_PER_GPU = 1
	NUM_CLASSES = len(CLASS_NAMES)
  
  # initialize the inference configuration
config = SimpleConfig()

weights_path =os.path.join(path, "mask_rcnn_coco.h5") 
print("[INFO] loading Mask R-CNN model...")
model = modellib.MaskRCNN(mode="inference", config=config,
model_dir=os.getcwd())
model.load_weights(weights_path, by_name=True)


# Add our own class_names to the class names list
CLASS_NAMES.append('Background')
CLASS_NAMES.append('Greenery')
CLASS_NAMES.append('OOB')

In [37]:
#@title Load in video recording and Gaze coordinates
#@markdown This loads in the video and gaze coordinates. It checks that the eye tracker did not miss too many frames when capturing gaze location.


def load_gaze_data(path, gaze_filename):

  """ Loads in the gaze data. Returns list for X and Y coordinates """

  raw_gaze = open(os.path.join(path, gaze_filename)).read().strip().split("\n")

  raw_gaze.pop(0) # remove the x and y
  GX = [round(float(pair.split(",")[0])) for pair in raw_gaze]
  GY = [round(float(pair.split(",")[1])) for pair in raw_gaze]
  return GX, GY

def load_video(path, video_filename):
  """ Loads in the video and returns a frame-by-frame video variable vs. Also returns video output path"""
  vid_input = os.path.join(path, video_filename)
  vid_output = os.path.join(path, "output_" + video_filename)

  #Initialise the video stream and pointer to output video file
  vs = cv2.VideoCapture(vid_input)
  writer = None

  return vs, vid_output

def check_frames(vs):
  """ Check that the number of frames in which the eye tracker failed to collect any data isn't too high"""

  # Count number of frames in video
  prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT
  total = int(vs.get(prop))
  print("[INFO] {} total frames in video".format(total))

  # compare to number of gaze datapoints to check the gaze tracker didn't miss to many
  dropped_frames = total - len(GX)

  if dropped_frames < 10:
    [GX.append(0) for i in range(0,dropped_frames)]
    [GY.append(0) for i in range(0,dropped_frames)]
    print('[INFO] The eye tracker dropped {} frames. {} 0s have been added to each gaze coordinate list.'.format(dropped_frames, dropped_frames))

  elif dropped_frames > 10:
    raise Exception('The number of dropped frames is too high: {}'.format(dropped_frames))
  

""" Load in the video and gaze data"""
GX, GY = load_gaze_data(path, gaze_filename)
video, vid_output = load_video(path,video_filename)
check_frames(vs = video)

In [39]:
#@title SAGA For Loop Functions
""" SAGA For Loop Functions """

def draw_mrcnn_output(frame, startX, startY, endX, endY, color, label, score):
  cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
  text = "{}: {:.3f}".format(label, score)
  y = startY - 10 if startY - 10 > 10 else startY + 10
  cv2.putText(frame, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX,
    0.6, color, 2)   

def apply_green_overlay(frame, gmask, greenery_score):
    overlay = frame.copy()
    grindex = np.where(gmask == 255) # Find the pixels that are green
    overlay[grindex[0][:],grindex[1][:],0] = 250 # Change these pixels in the overlay - make blue value high
    overlay[grindex[0][:],grindex[1][:],1] = 20 # Green value low
    overlay[grindex[0][:],grindex[1][:],2] = 20 # Red value low
    alpha = 0.6
    cv2.addWeighted(overlay, alpha, frame, 1-alpha, 0 , frame)

    if len(grindex[0][:]):
      temp_greenery_score = (len(grindex[0][:]) / (720*1280)) * 100
      greenery_score.append(temp_greenery_score) #  This stores the percentage of the given frame that was identified as green. 

    return greenery_score

 
def identify_inframe_objects(GY, GX, count, mask, inframe_gaze_checklist, confidence_list, inframe_object_loc_X,\
                          inframe_object_loc_Y, startX, startY):
 

  if GY[count] > 0 and GY[count] < 720 and GX[count] > 0 and GX[count] < 1280: # If the gaze fell within the headview camera's boundaries
    if mask[GY[count],GX[count]]: # == True:
      #label_winner = classID
      inframe_gaze_checklist.append(CLASS_NAMES[classID]) # inframe_gaze_checklist is the objects appearing in the given frame
      confidence_list.append(2)
      inframe_object_loc_X.append(startX)
      inframe_object_loc_Y.append(startY)

    else:
      #label_winner = 'Background'
      inframe_gaze_checklist.append(0)
      confidence_list.append(0)
      inframe_object_loc_X.append(0)
      inframe_object_loc_Y.append(0)
      
  else: # If gaze did not fall within the head view camera boundaries then add 'outofbounds' to the gazed_upon_object_list list
    label_winner = 'Out Of Bounds'
    inframe_gaze_checklist.append('OOB')
    confidence_list.append(0)
    inframe_object_loc_X.append(0)
    inframe_object_loc_Y.append(0)

  return inframe_gaze_checklist, confidence_list, inframe_object_loc_X, inframe_object_loc_Y


def get_gazed_upon_object(inframe_gaze_checklist, confidence_list, inframe_object_loc_X, inframe_object_loc_Y, gazed_upon_object_list, confidence, gmask):

  gazed_upon_index = [i for i, e in enumerate(inframe_gaze_checklist) if e != 0] # gazed_upon_index is the index of the winning classID (if there is one)


  if len(gazed_upon_index) > 0: # If there is a single winning object then add the winner to the gazed_upon_object_list list    
    new_champ = inframe_gaze_checklist[gazed_upon_index[0]]  
    current_startX = inframe_object_loc_X[gazed_upon_index[0]] # This defines the current location of the gazed upon object
    current_startY = inframe_object_loc_Y[gazed_upon_index[0]]
    
    if new_champ == 59: # if it's a potted plant, then mark as green
      gazed_upon_object_list.append('Greenery')
    else: # If the recognised object is not a potted plant, then add that classID to the gazed_upon_object_list list
      gazed_upon_object_list.append(new_champ)

  elif len(gazed_upon_index) == 0: # If no winning object was found, then check if green is being looked at
    
    if GY[count] > 0 and GY[count] < 720 and GX[count] > 0 and GX[count] < 1280: # If the gaze fell within the headview camera's boundaries 
      if gmask[GY[count],GX[count]] == 255: # If the gaze coords falls on a green area (The mask is flipped - so is GY,GX)
        new_champ = 'Greenery'
        gazed_upon_object_list.append('Greenery') # 82 is greenery
        confidence.append(0)
        
      elif gmask[GY[count],GX[count]] == 0:
        new_champ = 'Background'
        gazed_upon_object_list.append('Background')
        confidence.append(0)

    else:
      new_champ = 'OOB'

  return gazed_upon_object_list, confidence, new_champ


def overlay_label(CLASS_NAMES, new_champ, frame):
  # Draw the Text on top
  font = cv2.FONT_HERSHEY_SIMPLEX 
  org = (50, 50)       
  fontScale = 2      
  color = (0, 0, 255)  # Red     
  thickness = 2      
  # winner_label = CLASS_NAMES[new_champ] # text to draw
  image = cv2.putText(frame, new_champ, org, font,  
                    fontScale, color, thickness, cv2.LINE_AA) 
  return image


def overlay_gaze_cursor(GX, GY, count, frame):
  center_coordinates = (GX[count], GY[count]) 
  radius = 30 
  color = (0, 0, 255) 
  thickness = -1
    
  # Using cv2.circle() method 
  # Draw a circle of red color of thickness -1 px 
  image = cv2.circle(frame, center_coordinates, radius, color, thickness) 
  return image

def print_user_info(count, total):
  if count == 8:
    elap = (end - start)
    print("[INFO] single frame took {:.4f} seconds".format(elap))
    print("[INFO] estimated total time to finish: {:.4f}".format((elap * (total/4))))    
  elif count == round(total/8): #round(quartal/2):
    print("[INFO] Halfway!")

In [None]:
#@title Main Gaze Detection For Loop
# Define the lists that will be built in the following while loop
gazed_upon_object_list = [] # List of objects that were being looked at for each frame
confidence = [] # List saying whether the gaze is in the mask/just in the box
green_list = []
greenery_score = []
persons_in_video = []
vehicles_in_video = []


"""
Begin the processing for loop. This finds the coordinates and labels for each object and computes a greenery mask. 
It compares the gaze coordinates with the object coordinates/greenery mask to determine what is being looked at for each frame.
The final frame-by-frame list is stored in the 'gazed_upon_object_list' and the 'average_persons_per_frame' and 
'average_vehicles_per_frame' are an show the average number of persons/vehicles in each frame. The average_greenery_score
is the average percentage of pixels in each frame that were tagged as green.
"""
now = datetime.now() #Print the current time for timing checks
current_time = now.strftime("%H:%M:%S")
print('[INFO] Starting Time =', current_time)

for count in range(0, total):
  
  start = time.time()
  # read the next frame from the file
  (grabbed, frame) = vs.read()
  
  if (count % 4) == 0: # We only process 1 in 4 frames (to save time). Delete this if statement to process all frames
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    gmask = cv2.inRange(hsv,(30, greenery_value_A, 20), (greenery_value_B, 255, 255) )
    r = model.detect([frame], verbose=0)[0] # verbose tells us whether we want to see the output or not

    inframe_gaze_checklist = []
    inframe_labels = []
    confidence_list = []
    inframe_object_loc_X = []
    inframe_object_loc_Y = []
    persons_in_frame = 0
    vehicles_in_frame = 0

    for object in range(0, r["rois"].shape[0]): # Loop over class labels
      # extract the class ID and mask for the current detection, then
      # grab the color to visualize the mask (in BGR format)
      classID = r["class_ids"][object]
      mask = r["masks"][:, :, object]
      color = COLORS[classID][::-1]

      # visualize the pixel-wise mask of the object
      frame = visualize.apply_mask(frame, mask, color, alpha=0.5)

      # extract the bounding box information, class ID, label, predicted
      # probability, and visualization color
      (startY, startX, endY, endX) = r["rois"][object]
      classID = r["class_ids"][object]
      label = CLASS_NAMES[classID]
      score = r["scores"][object]
      color = [int(c) for c in np.array(COLORS[classID]) * 255]
      inframe_labels.append(classID)

      if classID == 1: # If it's a person, add one to the person count list
        persons_in_frame += 1
      elif classID == 3 or classID == 4 or classID == 6 or classID == 8: # If object is a car, bus, motorbike or truck, then add that to the list
        vehicles_in_frame +=1

      draw_mrcnn_output(frame, startX, startY, endX, endY, color, label, score)

      # Find labels and coords for all objects in a frame
      inframe_gaze_checklist, confidence_list, inframe_object_loc_X, inframe_object_loc_Y = identify_inframe_objects(GY, GX, count, mask, inframe_gaze_checklist, confidence_list, inframe_object_loc_X,\
                          inframe_object_loc_Y, startX, startY)

      # Now combine the frames together to produce the output video     
      if writer is None:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG") # Initialises the video writer
        writer = cv2.VideoWriter(vid_output, fourcc, 10,
          (frame.shape[1], frame.shape[0]), True) # Vid output is the path to the drive folder where the output video will go  
    
    greenery_score = apply_green_overlay(frame, gmask, greenery_score) # Find percentage of frame that is green greenery_score and apply greenery mask to the frame
    persons_in_video.append(persons_in_frame) # Add the v and p counts so that index 1 gives the number of people/vehicles in the first frame, same for index 2 etc....
    vehicles_in_video.append(vehicles_in_frame)
    
    
    gazed_upon_index = [i for i, e in enumerate(inframe_gaze_checklist) if e != 0] # gazed_upon_index is the index of the winning classID (if there is one)

    gazed_upon_object_list, confidence, new_champ = get_gazed_upon_object(inframe_gaze_checklist, confidence_list, inframe_object_loc_X, inframe_object_loc_Y, gazed_upon_object_list, confidence, gmask)

    overlay_label(CLASS_NAMES, new_champ, frame) # Change to not being == to image
    overlay_gaze_cursor(GX, GY, count, frame)

    end = time.time()
    writer.write(frame)

  print_user_info(count, total)

# release the file pointers
print("[INFO] cleaning up...")
writer.release()
vs.release()

 

In [48]:
#@title Download the gaze statistics files
#@markdown This downloads two files: one contains gaze statistics, the other is a list of objects gazed upon for each frame of footage.

def get_gaze_stats(persons_in_video, vehicles_in_video, greenery_score, total, video_filename):
  """ Generates and downloads a file of summary statistics  """

  average_persons_per_frame = (sum(persons_in_video)/len(persons_in_video))
  average_vehicles_per_frame = (sum(vehicles_in_video)/len(vehicles_in_video))
  average_greenery_score = (sum(greenery_score)/len(greenery_score))
  key_features_scores_list = [int(round(total/4)), average_persons_per_frame, average_vehicles_per_frame, average_greenery_score]

  key_feature_scores_df = pd.DataFrame(columns=['Feature', 'Score'])
  key_feature_scores_df['Feature'] = ['Num of processed frames','Persons per frame', 'Vehicles per frame', 'Greenery per frame (% of pixels)']
  key_feature_scores_df['Score'] = key_features_scores_list

  output_filename = "summary_scores_" + video_filename
  key_feature_scores_df.to_csv(output_filename, index = False)
  files.download(output_filename)


def get_GazedUpon_file(gazed_upon_object_list, video_filename):

  """ Returns csv file of objects gazed upon for each frame """

  output_filename = "gazed_upon_objects" + video_filename
  GU_list_df = pd.DataFrame(gazed_upon_object_list) 
  GU_list_df.to_csv(output_filename, index = False)
  files.download(output_filename)

get_gaze_stats(persons_in_video, vehicles_in_video, greenery_score, total, video_filename)
get_GazedUpon_file(gazed_upon_object_list, video_filename)

In [1]:
#@title Functions for Generating Graphs
def normalise_data(data):
  data_set_edit = []
  for i in range(0,len(data)):
    data_set_edit.append(data[i])# [0])
  return data_set_edit

def prepare_for_graphing(data_set, unique_cats): # Prepare data for graphing
  unique_cats_2 = unique_cats
  data_set_2 = data_set
  graph_list = []
  for category in unique_cats:
    graph_list.append([i for i, e in enumerate(data_set) if e == category])


  GU_object_graphing_dict = {}
  for i in range(0,len(unique_cats)):
    GU_object_graphing_dict.update( {unique_cats[i]: get_broken_bar_list(graph_list[i])} )
    
  return GU_object_graphing_dict

# Function to get final list for the broken bar chart
def get_broken_bar_list(gaze_object_list):
  output_list = []
  for i in gaze_object_list:
    output_list.append((i,1))

  return output_list

def generate_graph(GU_object_graphing_dict, unique_cats, system_data, ax):

  y_tick_labels = [] # For the Ytick Labels
  iterations = []

  color_list = ['purple', 'olive', 'brown', 'cyan', 'pink']*15
  color_dict = {}
  count = 0
  # Get colour for the graphing
  for i in unique_cats:
    if i == 'Greenery':
      color_dict.update( {i: 'green'} )
    elif i == 'Person':
      color_dict.update( {i:'red'})
    elif i == 'Vehicle':
      color_dict.update( {i:'orange'})
    elif i == 'Background':
      color_dict.update( {i:'blue'})
    else:
      color_dict.update( {i: color_list[count]} )
    count += 1


  count = 0
  for i in GU_object_graphing_dict:
    count += 1
    ax.broken_barh(GU_object_graphing_dict[i], ((count*5), 5), facecolors='tab:{}'.format(color_dict[i])) 

    y_tick_labels.append(count*5+2.5)
    iterations.append(count*5)


  ax.set_ylim(5, len(unique_cats)*5+5)
  ax.set_xlim(0, len(system_data))
  ax.set_xlabel('Frame Number')
  ax.set_ylabel('Gazed Upon Object')

  # if title:
  #   ax.set_title('Objects Gazed At For Each Frame Of Footage As Generated By The Novel System and The Human Coder'.format(coder_identity))

  ax.set_yticks(y_tick_labels)
  ax.set_yticklabels(unique_cats)
  ax.grid(False)

  iterations = [10,15,20,25,30] # Add in lines around bars
  for i in iterations:
    ax.axhline(y=i,linewidth=1, color='gray', alpha = 0.3)

  return ax

In [None]:
#@title Generate and Download Summary Visualisations
normal_system_data = normalise_data(gazed_upon_object_list)
system_unique_categories = list(set(normal_system_data)) # Lists all unique objects appearing in GU list
GU_object_graphing_dict = prepare_for_graphing(normal_system_data, system_unique_categories)

#Initiate Graph
fig, plotter = plt.subplots(1, figsize=(8,3), sharex = True, gridspec_kw = {'hspace' : 0.05})
plt.rc('font', family='sans serif')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')

ax = generate_graph(GU_object_graphing_dict, system_unique_categories, gazed_upon_object_list, plotter)

fig.savefig('Comparison_Fig_Video_{}.svg'.format(date), format='svg', dpi=1200) # , dpi=1200)
files.download('Comparison_Fig_Video_{}.svg'.format(date))