## Mount with your Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import json
import os, re
from pathlib import Path
import scipy
import math
from scipy.ndimage import gaussian_filter1d

## Define framework

In [None]:
def results(path_to_file,model,mono,smoothing,n_Gap,false_positive,false_negative):
  if model == 'yolo':
    yolo = True
    frcnn = not(yolo) 
  else:
    yolo = False
    frcnn = not(yolo) 
  multi = not(mono)
  if smoothing == 'Gaussian':
    smooth_Gauss = True
    smooth_Gap = False
  elif smoothing == 'Naive':
    smooth_Gauss = False
    smooth_Gap = True 
  else:
    smooth_Gauss = False
    smooth_Gap = False 
  if mono == True:
    number_of_classes = 5
  else:
    number_of_classes = 6
  frame_rate = 25

  ##import 
  if yolo == True:
    df_raw = pd.read_json(path_to_file)
    df_raw = df_raw.sort_values(by=['filename'])
    df_raw = df_raw[:-1]
    df_raw['filename'] = df_raw['filename'].map(lambda x: Path(x).stem)
    df_raw['new_index'] = df_raw.filename.str.extract(r'(?:_)(\d+)') 
    df_raw['new_index'] = df_raw['new_index'].map(lambda x: f'{x:0>6}')
    df_raw = df_raw.sort_values(by=['new_index'])
    df_results = df_raw[['new_index','objects']]
    df_results.columns = ['frame', 'objects']
    df_results = df_results.set_index('frame') 

    new_columns = []
    for c in range(0,number_of_classes):
      new_columns.append('boolean_class_'+str(c))
      new_columns.append('seq_class_'+str(c))  
      new_columns.append('conf_class_'+str(c)) 
      new_columns.append('center_x_class_'+str(c)) 
      new_columns.append('center_y_class_'+str(c)) 
      new_columns.append('height_class_'+str(c)) 
      new_columns.append('width_class_'+str(c)) 
    df_results = df_results.reindex(df_results.columns.tolist() + new_columns, axis=1)
    for c in range(0,number_of_classes):
      df_results['boolean_class_'+str(c)].fillna(False, inplace=True)
      df_results['seq_class_'+str(c)].fillna(0, inplace=True) 
    df_results=df_results.reset_index(drop=True) 

    for index, row in df_results.iterrows(): 
      for obj in row.objects: 
        df_results.iloc[int(index), df_results.columns.get_loc('boolean_class_'+str(obj['class_id']))] = True
        df_results.iloc[int(index), df_results.columns.get_loc('conf_class_'+str(obj['class_id']))] = obj['confidence']
        df_results.iloc[int(index), df_results.columns.get_loc('center_x_class_'+str(obj['class_id']))] = obj['relative_coordinates']['center_x']
        df_results.iloc[int(index), df_results.columns.get_loc('center_y_class_'+str(obj['class_id']))] = obj['relative_coordinates']['center_y']
        df_results.iloc[int(index), df_results.columns.get_loc('height_class_'+str(obj['class_id']))] = obj['relative_coordinates']['height']
        df_results.iloc[int(index), df_results.columns.get_loc('width_class_'+str(obj['class_id']))] = obj['relative_coordinates']['width']
  if frcnn == True:
    df_raw = pd.read_json(path_to_file)
    new_columns = []
    for c in range(0,number_of_classes):
      new_columns.append('boolean_class_'+str(c))
      new_columns.append('seq_class_'+str(c))  
      new_columns.append('conf_class_'+str(c)) 
      new_columns.append('center_x_class_'+str(c)) 
      new_columns.append('center_y_class_'+str(c)) 
      new_columns.append('height_class_'+str(c)) 
      new_columns.append('width_class_'+str(c)) 
    df_results=pd.DataFrame(index=np.arange(df_raw.shape[0]))
    df_results = df_results.reindex(df_results.columns.tolist() + new_columns, axis=1)
    for c in range(0,number_of_classes):
      df_results['boolean_class_'+str(c)].fillna(False, inplace=True)
      df_results['seq_class_'+str(c)].fillna(0, inplace=True)
    for index, row in df_raw.iterrows():  
        for name, values in df_raw.iloc[index].iteritems(): 
          if values is None:
            pass
          else:  
            for c in range(0,number_of_classes):
              if int(list(values.keys())[0])-1==c:
                df_results.iloc[int(index), df_results.columns.get_loc('boolean_class_'+str(c))] = True 
                df_results.iloc[int(index), df_results.columns.get_loc('center_x_class_'+str(c))] = (list(values.values())[0][1]+list(values.values())[0][3])/2
                df_results.iloc[int(index), df_results.columns.get_loc('center_y_class_'+str(c))] = (list(values.values())[0][0]+list(values.values())[0][2])/2
                df_results.iloc[int(index), df_results.columns.get_loc('height_class_'+str(c))] = list(values.values())[0][2]-list(values.values())[0][0]
                df_results.iloc[int(index), df_results.columns.get_loc('width_class_'+str(c))] = list(values.values())[0][3]-list(values.values())[0][1]
  ##smooth    
  if smooth_Gauss:
    for c in range(0,number_of_classes):
      df_results['boolean_class_'+str(c)] = gaussian_filter1d(df_results['boolean_class_'+str(c)], 1)
  if smooth_Gap:
    for index, row in df_results.iterrows():
      for c in range(0,number_of_classes):
        if index == 0:
          pass
        if index < n_Gap+1: #if index == '000000':
          for i in range(1,int(index)):
            if row['boolean_class_'+str(c)] and df_results.iloc[int(index)-i, df_results.columns.get_loc('boolean_class_'+str(c))]:
              for j in range(1,i):
                df_results.iloc[int(index)-j, df_results.columns.get_loc('boolean_class_'+str(c))]=True
        if row['boolean_class_'+str(c)]:
          for i in range(1,n_Gap+1):
            if row['boolean_class_'+str(c)] and df_results.iloc[int(index)-i, df_results.columns.get_loc('boolean_class_'+str(c))]:
              for j in range(1,i):
                df_results.iloc[int(index)-j, df_results.columns.get_loc('boolean_class_'+str(c))]=True
  #Sequentiality
  for index, row in df_results.iterrows():
    for c in range(0,number_of_classes):
      if index == 0: #if index == '000000':
        if row['boolean_class_'+str(c)]:
          df_results.iloc[int(index), df_results.columns.get_loc('seq_class_'+str(c))] = 1 
      else:
        if row['boolean_class_'+str(c)]:
          df_results.iloc[int(index), df_results.columns.get_loc('seq_class_'+str(c))] = df_results.iloc[int(index)-1, df_results.columns.get_loc('seq_class_'+str(c))]+1    
  for index, row in df_results.iterrows():
    for c in range(0,number_of_classes):
      sequence_len = int(df_results.iloc[int(index), df_results.columns.get_loc('seq_class_'+str(c))])
      if index == 0: #if index == '000000':
        pass
      else:
        if sequence_len>1:
          for i in range(1,sequence_len):
            df_results.iloc[int(index)-i, df_results.columns.get_loc('seq_class_'+str(c))] = sequence_len
  ##measures
  classes = []
  measures = []
  for c in range(0,number_of_classes):
    classes.append(c)  
  measures.append('frequency')
  measures.append('adjusted_frequency')
  measures.append('duration') 
  measures.append('adjusted_duration')
  measures.append('sequentiality')
  measures.append('adjusted_sequentiality')
  measures.append('prominence')
  measures.append('adjusted_prominence')
  measures.append('centrality')
  measures.append('exposure')
  measures.append('adjusted_exposure')
  df_measures = pd.DataFrame(classes)
  df_measures.columns = ['class']
  df_measures = df_measures.set_index('class')
  df_measures = df_measures.reindex(df_measures.columns.tolist() + measures, axis=1)
  #frequency
  for c in df_measures.index:
    df_measures.iloc[c][df_measures.columns.get_loc('frequency')] = sum(df_results.iloc[:,df_results.columns.get_loc('boolean_class_'+str(c))]) / df_results.shape[0]
  #duration
  for c in df_measures.index:
    df_measures.iloc[c][df_measures.columns.get_loc('duration')] = sum(df_results.iloc[:,df_results.columns.get_loc('boolean_class_'+str(c))]) / frame_rate
  #sequentiality
  for c in df_measures.index:  
    sequences = []
    for index, row in df_results.iterrows():
      if index == 0: #if index == '000000':
          pass
      else: 
          if row['seq_class_'+str(c)] != df_results.iloc[int(index)-1, df_results.columns.get_loc('seq_class_'+str(c))]:
            sequences.append(row['seq_class_'+str(c)])
    if len(sequences) != 0:
      df_measures.iloc[c][df_measures.columns.get_loc('sequentiality')] = sum(sequences)/len(sequences)
    elif row['seq_class_'+str(c)]>0:
     df_measures.iloc[c][df_measures.columns.get_loc('sequentiality')] =row['seq_class_'+str(c)]

    else:
      df_measures.iloc[c][df_measures.columns.get_loc('sequentiality')] = 0
  #prominence
  for c in df_measures.index:   
    centralities = []
    prominences = []
    for index, row in df_results.iterrows():  
        if not(math.isnan(row['center_x_class_'+str(c)])):
          #change to orthogonal distance
          p1 = [row['center_x_class_'+str(c)], row['center_y_class_'+str(c)]]
          p2 = [0.5, 0.5]
          distance = math.sqrt( ((p1[0]-p2[0])**2)+((p1[1]-p2[1])**2) )
          centrality = 1/distance/2#/25 #for interpretation
          centralities.append(centrality)

          area = row['height_class_'+str(c)]*row['width_class_'+str(c)] *100 #for interpretation
          prominence = area * centrality
          prominences.append(prominence)
    if sum(df_results['boolean_class_'+str(c)]) != 0:
      df_measures.iloc[c][df_measures.columns.get_loc('centrality')] = sum(centralities)/len(centralities)
      df_measures.iloc[c][df_measures.columns.get_loc('prominence')] = (sum(prominences)/len(prominences))
    else:
      df_measures.iloc[c][df_measures.columns.get_loc('centrality')] = 0
      df_measures.iloc[c][df_measures.columns.get_loc('prominence')] = 0

  # ##adjusted
  for c in df_measures.index:
    df_measures.iloc[c][df_measures.columns.get_loc('adjusted_frequency')] = df_measures['frequency'][c] * (1/(1+false_positive))*(1/(1-false_negative))
    df_measures.iloc[c][df_measures.columns.get_loc('adjusted_duration')] = df_measures['duration'][c] * (1/(1+false_positive))*(1/(1-false_negative))
    df_measures.iloc[c][df_measures.columns.get_loc('adjusted_sequentiality')] = df_measures['sequentiality'][c] * (1/(1+false_positive))*(1/(1-false_negative))
    df_measures.iloc[c][df_measures.columns.get_loc('adjusted_prominence')] = df_measures['prominence'][c] * (1/(1+false_positive))*(1/(1-false_negative))
  #exposure
  for c in df_measures.index:   
    exposures = []
    for index, row in df_results.iterrows():  
        if not(math.isnan(row['center_x_class_'+str(c)])):
          p1 = [row['center_x_class_'+str(c)], row['center_y_class_'+str(c)]]
          p2 = [0.5, 0.5]
          distance = math.sqrt( ((p1[0]-p2[0])**2)+((p1[1]-p2[1])**2) )
          centrality = 1/distance/2#/25 #for interpretation
          area = row['height_class_'+str(c)]*row['width_class_'+str(c)] *100 #for interpretation
          duration = row['seq_class_'+str(c)]

          exposure = area * (centrality/2) * (duration/5)
          exposures.append(exposure)
          
    df_measures.iloc[c][df_measures.columns.get_loc('exposure')] = sum(exposures)/df_results.shape[0]
    df_measures.iloc[c][df_measures.columns.get_loc('adjusted_exposure')] = sum(exposures)/df_results.shape[0] * (1/(1+false_positive))*(1/(1-false_negative))
  ##names
  if mono:
    locations = ['Campina', 'Jumbo', 'Jumbo_been', 'Loterij', 'Loterij_been']
  if multi:
    locations = ['shoulder', 'right chest', 'left lef', 'left chest', 'right leg', 'head'] 

  # print(df_measures[['adjusted_frequency', 'adjusted_duration', 'adjusted_sequentiality', 'adjusted_prominence','adjusted_exposure']].round(2).to_latex(index=False)) #For LaTeX purposes

  df_measures.insert(0, "location", locations, True)
  
  return df_measures

## Implementation


In [None]:
fp = 2/(44+2) 
fn = 1/(1+216-45)
df_output = results(path_to_file = '/content/drive/MyDrive/videos_testing/outputs/yolo_mono/result_video_2.json',model = 'yolo',mono=True,smoothing='Gaussian',n_Gap = Nan,false_positive = fp ,false_negative = fn) 