In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as patches
import seaborn as sns
import os
import glob
import csv
from natsort import natsorted
from itertools import chain
import scipy.stats as stats
from scipy.stats import brunnermunzel
from tqdm import tqdm

#check directory
os.getcwd()

#fonts
mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["ps.fonttype"] = 42
mpl.rcParams["font.sans-serif"] = "Arial"
mpl.rcParams["font.family"] = "sans-serif"

In [2]:
 def convert_frame_data(df):
    list_from_df = df.to_numpy()
    # list_from_df = np.array(list_from_df)
    # print(list_from_df[-1][11])
    log_label = "no"
    def_orientation_list = []
    # print(list_from_df[-1])
    if list_from_df[-1][12] == "POINT":
        last_frame = list_from_df[-1][15]
    elif list_from_df[-2][12] == "POINT":
        last_frame = list_from_df[-2][15]
    else:
        print("error")

    # print(list_from_df[:, 14])
    # print(int(last_frame))

    num_list = list_from_df[:, 15]
    num_list = np.array([int(j) for j in num_list])

    for i in range(int(last_frame)):
        if int(i) in num_list:
            # print(num_list)
            index_num = np.where(num_list==int(i))
            # print(index_num)
            line_list = list_from_df[index_num].flatten()
            # print(line_list)
            if line_list[12]=="START":
                log_label = "orientation"
                res = [i, log_label]
                def_orientation_list.append(res)
                continue

            if line_list[12]=="STOP":
                res = [i, log_label]
                def_orientation_list.append(res)
                log_label = "no"
                continue

            if line_list[12]=="finish":
                break
            else:
                break

        res = [i, log_label]
        def_orientation_list.append(res)
    def_orientation_list = np.array(def_orientation_list)
    return def_orientation_list, last_frame

In [3]:
def extract_orientation_frame(df):
    df_wing = df[df["class_name"] == "orientation"]
    df_wing = df_wing.drop_duplicates()
    df_wing = df_wing.filter(items=['frame', 'class_name'])
    df_ori_list = df_wing.values.tolist()
    df_ori_list = np.array(df_ori_list)
    return df_ori_list

# Accuracy Calculation

In [4]:
def cal_accuracy(def_orientation_list, df_ori_list):
    accuracy_list = []
    manual_num_list = def_orientation_list[:, 0]

    for k in range(def_orientation_list.shape[0]):
        manual_line_list = def_orientation_list[k]
        # print(manual_line_list[1])
        if manual_line_list[1]=="orientation":
            if manual_line_list[0] in df_ori_list[:, 0]:
                accuracy_list += [[k, True]]
            else:
                accuracy_list += [[k, False]]

        elif manual_line_list[1]=="no":
            if manual_line_list[0] not in df_ori_list[:, 0]:
                accuracy_list += [[k, True]]
            else:
                accuracy_list += [[k, False]]

        else:
            print("error")

    # print(len(accuracy_list), len(def_orientation_list)) 
    accuracy_df = pd.DataFrame(accuracy_list, columns=['frame', 'detection'])
    accuracy_true = len(accuracy_df[accuracy_df["detection"]==True])
    accuracy_all = len(accuracy_df)

    accuracy = accuracy_true/accuracy_all
    
    accuracy_df.to_csv('../outputs/accuracy_df_compared_def_yoru_data_orientation.csv')
    return accuracy, accuracy_true, accuracy_all

# Precision Calculation

In [5]:
def cal_precision(def_orientation_list, df_ori_list):
    precision_list = []
    manual_num_list = def_orientation_list[:, 0]

    # print(df_ori_list.shape[0])

    for j in range(df_ori_list.shape[0]):
        number = df_ori_list[j, 0]
        if number in manual_num_list:
            if def_orientation_list[int(number), 1] == "orientation":
                precision_list += [[int(number), True]]
            else:
                precision_list += [[int(number), False]]
        else:
            precision_list += [[int(number), False]]

    # print(len(precision_list), len(df_ori_list))

    precision_df = pd.DataFrame(precision_list, columns=['frame', 'detection'])
    precision_true = len(precision_df[precision_df["detection"]==True])
    precision_all = len(precision_df)

    precision =  precision_true/precision_all
    return precision,  precision_true, precision_all

# Recall Calculation

In [6]:
def cal_recall(def_orientation_list, df_ori_list):
    recall_list = []
    manual_num_list = def_orientation_list[:, 0]


    for k in range(def_orientation_list.shape[0]):
        manual_line_list = def_orientation_list[k]
        if manual_line_list[1]=="orientation":
            if manual_line_list[0] in df_ori_list[:, 0]:
                recall_list += [[k, True]]
            else:
                recall_list += [[k, False]]
        elif manual_line_list[1]=="no":
            pass
        else:
            print("error")
            # pass

    # print(len(recall_list), len(def_orientation_list)) 
    recall_df = pd.DataFrame(recall_list, columns=['frame', 'detection'])
    recall_true = len(recall_df[recall_df["detection"]==True])
    recall_false = len(recall_df[recall_df["detection"]==False])
    recall_all = len(recall_df)
    try: 
        recall = recall_true/recall_all
    except ZeroDivisionError:
        recall = 0
    return recall, recall_true, recall_all

# Calculate F1 score

In [7]:
def cal_f1_score(precision, recall):
    try:
        F1 = (2*precision*recall) /(precision + recall)
    except:
        F1 = 0
    return F1


In [8]:
df_file_list = pd.read_csv('../data/file_list.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)

In [9]:
file_list

array([['200529_3_on.csv', '20529_3_on.csv',
        'processed_200529_3_onleft2.csv',
        'processed_200529_3_onright2.csv', 'movie2'],
       ['210118_1_on.csv', '210118_1_on.csv',
        'processed_210118_1_onleft.csv',
        'processed_210118_1_onright.csv', 'movie4']], dtype='<U31')

In [10]:
def cal_orientation_frame(df_left, df_right):
    left_array = df_left.to_numpy()
    right_array = df_right.to_numpy()
    
    last_frame = left_array.shape[0]
    # print(last_frame)
    
    log_label = "no"
    orientation_def_list = []
    
    for i in range(int(last_frame)):
        if int(left_array[i, 9]) == 1 & int(right_array[i, 9]) == 1:
            log_label = "orientation"
        else:
            log_label = "no"
        
        res = [i, log_label]
        orientation_def_list.append(res)
    orientation_def_list = np.array(orientation_def_list)
    return orientation_def_list, last_frame
                

In [11]:
# Compared to Definitions
result_list = np.empty((0, 10))  # 初期の空の二次元配列
all_precision_true = 0
all_precision_all = 0
all_recall_true = 0
all_recall_all = 0
all_accuracy_true = 0
all_accuracy_all = 0
total_frame = 0
file_count = 1
last_frame = 0

for file_line in tqdm(file_list):
    yoru_data_path = f"../data/YORU_data/{file_line[0]}"
    df_yoru = pd.read_csv(yoru_data_path)
    df_ori_list = extract_orientation_frame(df_yoru)
    
    
    def_left_data_path = f"../data/Def_data/{file_line[2]}"
    df_def_left = pd.read_csv(def_left_data_path)
    
    def_right_data_path = f"../data/Def_data/{file_line[3]}"
    df_def_right = pd.read_csv(def_right_data_path)
    
    def_orientation_list, last_frame = cal_orientation_frame(df_def_left, df_def_right)
    
    def_orientation_df = pd.DataFrame(def_orientation_list, columns = ["frame", "detection"])
    def_orientation_df.to_csv(f'../data/Def_data/{file_count}_{os.path.splitext(file_line[2])[0]}_{os.path.splitext(file_line[3])[0]}.csv')
    file_count += 1
    # print(def_orientation_list)
    
    
    precision, precision_true, precision_all = cal_precision(def_orientation_list, df_ori_list)
    recall, recall_true, recall_all = cal_recall(def_orientation_list, df_ori_list)
    accuracy, accuracy_true, accuracy_all = cal_accuracy(def_orientation_list, df_ori_list)
    f1_score = cal_f1_score(precision, recall)
    
    file_line = np.append(file_line, [accuracy, precision, recall, f1_score, last_frame])
    file_line = file_line.reshape(1, -1)  # 1行の2次元配列に変換
    result_list = np.vstack((result_list, file_line))
    
    all_precision_true += precision_true
    all_precision_all += precision_all
    all_recall_true += recall_true
    all_recall_all += recall_all
    all_accuracy_true += accuracy_true
    all_accuracy_all += accuracy_all
    total_frame += int(last_frame)
    
all_accuracy = all_accuracy_true / all_accuracy_all
all_precision = all_precision_true / all_precision_all
all_recall = all_recall_true / all_recall_all

value1 = all_precision_true + all_recall_true
value2 = all_precision_all + all_recall_all
all_f1_score = (2 * all_precision * all_recall) / (all_precision + all_recall) 

total_row = np.array(["total", "total", "total","total", "total", all_accuracy, all_precision, all_recall, all_f1_score, total_frame])
result_list = np.vstack((result_list, total_row.reshape(1, -1)))  # 1行の2次元配列に変換して追加


100%|██████████| 2/2 [00:00<00:00,  2.27it/s]


In [12]:
result_list

array([['200529_3_on.csv', '20529_3_on.csv',
        'processed_200529_3_onleft2.csv',
        'processed_200529_3_onright2.csv', 'movie2',
        '0.8096085409252669', '0.8641342344441855', '0.7658516369664319',
        '0.8120298827315381', '8992.0'],
       ['210118_1_on.csv', '210118_1_on.csv',
        'processed_210118_1_onleft.csv',
        'processed_210118_1_onright.csv', 'movie4', '0.8079403914590747',
        '0.9010115606936416', '0.6315522917194226', '0.7425934196814055',
        '8992.0'],
       ['total', 'total', 'total', 'total', 'total',
        '0.8087744661921709', '0.8785947017991217', '0.7054131054131054',
        '0.7825368210607336', '17984']], dtype='<U32')

In [13]:
# 初期化
result_list2 = np.empty((0, 10))  # 初期の空の二次元配列
all_precision_true = 0
all_precision_all = 0
all_recall_true = 0
all_recall_all = 0
all_accuracy_true = 0
all_accuracy_all = 0
total_frame = 0

last_frame = 0


file_line = 0
for file_line in tqdm(file_list):
             
    yoru_data_path = f"../data/YORU_data/{file_line[0]}"
    df_yoru = pd.read_csv(yoru_data_path)
    df_ori_list = extract_orientation_frame(df_yoru)
    df_ori = pd.DataFrame(df_ori_list)
    df_ori.to_csv('../outputs/df_ori_compared_def_yoru_data_orientation.csv')

    boris_data_path = f"../data/BORIS_data/{file_line[1]}"
    df_boris = pd.read_csv(boris_data_path)
    boris_orientation_list, last_frame = convert_frame_data(df_boris)
    df_ori_boris = pd.DataFrame(boris_orientation_list)
    df_ori_boris.to_csv('../outputs/df_ori_boris_compared_def_yoru_data_orientation.csv')

    precision, precision_true, precision_all = cal_precision(boris_orientation_list, df_ori_list)
    recall, recall_true, recall_all = cal_recall(boris_orientation_list, df_ori_list)
    accuracy, accuracy_true, accuracy_all = cal_accuracy(boris_orientation_list, df_ori_list)
    f1_score = cal_f1_score(precision, recall)

    file_line = np.append(file_line, [accuracy, precision, recall, f1_score, last_frame])
    file_line = file_line.reshape(1, -1)  # 1行の2次元配列に変換
    result_list2 = np.vstack((result_list2, file_line))

    all_precision_true += precision_true
    all_precision_all += precision_all
    all_recall_true += recall_true
    all_recall_all += recall_all
    all_accuracy_true += accuracy_true
    all_accuracy_all += accuracy_all
    total_frame += int(last_frame)
    
all_accuracy = all_accuracy_true / all_accuracy_all
all_precision = all_precision_true / all_precision_all
try:
    all_recall = all_recall_true / all_recall_all
except ZeroDivisionError:
    all_recall = 0

value1 = all_precision_true + all_recall_true
value2 = all_precision_all + all_recall_all
try:
    all_f1_score = (2 * all_precision * all_recall) / (all_precision + all_recall) 
except ZeroDivisionError:
    all_f1_score = 0

total_row = np.array(["total", "total", "total","total", "total", all_accuracy, all_precision, all_recall, all_f1_score, total_frame])
result_list2 = np.vstack((result_list2, total_row.reshape(1, -1)))  # 1行の2次元配列に変換して追加


100%|██████████| 2/2 [00:00<00:00,  2.38it/s]


In [14]:
result_list2

array([['200529_3_on.csv', '20529_3_on.csv',
        'processed_200529_3_onleft2.csv',
        'processed_200529_3_onright2.csv', 'movie2',
        '0.8954626334519573', '0.892099743649499', '0.8879590887959089',
        '0.8900246003642884', '8992.0'],
       ['210118_1_on.csv', '210118_1_on.csv',
        'processed_210118_1_onleft.csv',
        'processed_210118_1_onright.csv', 'movie4', '0.9141459074733096',
        '0.8742774566473989', '0.8503162333099087', '0.8621303883149269',
        '8992.0'],
       ['total', 'total', 'total', 'total', 'total',
        '0.9048042704626335', '0.8851112055531946', '0.8729714605484051',
        '0.8789994199452268', '17984']], dtype='<U32')

In [15]:
# Manual vs Definision
result_list3 = np.empty((0, 10))  # 初期の空の二次元配列
all_precision_true = 0
all_precision_all = 0
all_recall_true = 0
all_recall_all = 0
all_accuracy_true = 0
all_accuracy_all = 0
total_frame = 0
file_count = 1
last_frame = 0

file_line = 0
for file_line in tqdm(file_list):
             
    def_left_data_path = f"../data/Def_data/{file_line[2]}"
    df_def_left = pd.read_csv(def_left_data_path)
    
    def_right_data_path = f"../data/Def_data/{file_line[3]}"
    df_def_right = pd.read_csv(def_right_data_path)
    
    def_orientation_list, last_frame = cal_orientation_frame(df_def_left, df_def_right)
    
    def_orientation_df = pd.DataFrame(def_orientation_list, columns = ["frame", "detection"])
    # print(def_orientat

    boris_data_path = f"../data/BORIS_data/{file_line[1]}"
    df_boris = pd.read_csv(boris_data_path)
    boris_orientation_list, last_frame = convert_frame_data(df_boris)
    boris_filtered_list = [item for item in boris_orientation_list if item[1] == 'orientation']
    boris_filtered_list = np.array(boris_filtered_list)
    df_ori_boris = pd.DataFrame(boris_orientation_list)
    
    print(boris_filtered_list)
    

    precision, precision_true, precision_all = cal_precision(def_orientation_list, boris_filtered_list)
    recall, recall_true, recall_all = cal_recall(def_orientation_list, boris_filtered_list)
    accuracy, accuracy_true, accuracy_all = cal_accuracy(def_orientation_list, boris_filtered_list)
    f1_score = cal_f1_score(precision, recall)

    file_line = np.append(file_line, [accuracy, precision, recall, f1_score, last_frame])
    file_line = file_line.reshape(1, -1)  # 1行の2次元配列に変換
    result_list3 = np.vstack((result_list3, file_line))

    all_precision_true += precision_true
    all_precision_all += precision_all
    all_recall_true += recall_true
    all_recall_all += recall_all
    all_accuracy_true += accuracy_true
    all_accuracy_all += accuracy_all
    total_frame += int(last_frame)
    
all_accuracy = all_accuracy_true / all_accuracy_all
all_precision = all_precision_true / all_precision_all
try:
    all_recall = all_recall_true / all_recall_all
except ZeroDivisionError:
    all_recall = 0

value1 = all_precision_true + all_recall_true
value2 = all_precision_all + all_recall_all
try:
    all_f1_score = (2 * all_precision * all_recall) / (all_precision + all_recall) 
except ZeroDivisionError:
    all_f1_score = 0

total_row = np.array(["total", "total", "total","total", "total", all_accuracy, all_precision, all_recall, all_f1_score, total_frame])
result_list3 = np.vstack((result_list3, total_row.reshape(1, -1)))  # 1行の2次元配列に変換して追加


  0%|          | 0/2 [00:00<?, ?it/s]

[['38' 'orientation']
 ['39' 'orientation']
 ['40' 'orientation']
 ...
 ['8926' 'orientation']
 ['8927' 'orientation']
 ['8928' 'orientation']]


 50%|█████     | 1/2 [00:00<00:00,  1.78it/s]

[['15' 'orientation']
 ['16' 'orientation']
 ['17' 'orientation']
 ...
 ['8989' 'orientation']
 ['8990' 'orientation']
 ['8991' 'orientation']]


100%|██████████| 2/2 [00:00<00:00,  2.11it/s]


In [16]:
result_list3

array([['200529_3_on.csv', '20529_3_on.csv',
        'processed_200529_3_onleft2.csv',
        'processed_200529_3_onright2.csv', 'movie2',
        '0.8004893238434164', '0.8523942352394235', '0.7598425196850394',
        '0.8034618755477652', '8992.0'],
       ['210118_1_on.csv', '210118_1_on.csv',
        'processed_210118_1_onleft.csv',
        'processed_210118_1_onright.csv', 'movie4', '0.8228425266903915',
        '0.9139142656359803', '0.6586477589263104', '0.7655629139072847',
        '8992.0'],
       ['total', 'total', 'total', 'total', 'total', '0.811665925266904',
        '0.8768886401790711', '0.7143019943019943', '0.7872888274822584',
        '17984']], dtype='<U32')

In [17]:
results = pd.DataFrame(result_list, columns = ["yoru_data_name", "boris_data_name", "def_left_data_name", "def_right_data_name", "movie_idex", "accuracy", "precision", "recall", "f1_score", "movie_total_frames"])
results.to_csv('../outputs/compared_def_yoru_data_orientation.csv')

In [18]:
results2 = pd.DataFrame(result_list2, columns = ["yoru_data_name", "boris_data_name", "def_left_data_name", "def_right_data_name", "movie_idex", "accuracy", "precision", "recall", "f1_score", "movie_total_frames"])
results2.to_csv('../outputs/compared_human_yoru_data_orientation.csv')

In [19]:
results3 = pd.DataFrame(result_list3, columns = ["yoru_data_name", "boris_data_name", "def_left_data_name", "def_right_data_name", "movie_idex", "accuracy", "precision", "recall", "f1_score", "movie_total_frames"])
results3.to_csv('../outputs/compared_human_def_orientation.csv')