In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as patches
import seaborn as sns
import os
import glob
import csv
from natsort import natsorted
from itertools import chain
import scipy.stats as stats
from scipy.stats import brunnermunzel
from tqdm import tqdm

#check directory
os.getcwd()

#fonts
mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["ps.fonttype"] = 42
mpl.rcParams["font.sans-serif"] = "Arial"
mpl.rcParams["font.family"] = "sans-serif"

In [None]:
 def convert_frame_data(df):
    list_from_df = df.values.tolist()
    list_from_df = np.array(list_from_df)
    # print(list_from_df[-1][11])
    log_label = "no"
    wing_boris_list = []
    # print(list_from_df[-1])
    if list_from_df[-1][12] == "POINT":
        last_frame = list_from_df[-1][15]
    elif list_from_df[-2][12] == "POINT":
        last_frame = list_from_df[-2][15]
    else:
        print("error")

    # print(list_from_df[:, 14])
    # print(int(last_frame))

    num_list = list_from_df[:, 15]

    for i in range(int(last_frame)):
        if str(i) in num_list:
            index_num = np.where(num_list==str(i))
            # print(index_num)
            line_list = list_from_df[index_num].flatten()
            # print(line_list)
            if line_list[12]=="START":
                log_label = "trophallaxis"
                res = [i, log_label]
                wing_boris_list.append(res)
                continue

            if line_list[12]=="STOP":
                res = [i, log_label]
                wing_boris_list.append(res)
                log_label = "no"
                continue

            if line_list[12]=="finish":
                break
            else:
                break

        res = [i, log_label]
        wing_boris_list.append(res)
    wing_boris_list = np.array(wing_boris_list)
    return wing_boris_list, last_frame

In [None]:
def extract_wing_frame(df):
    df_wing = df[df["class_name"] == "trophallaxis"]
    df_wing = df_wing.drop_duplicates()
    df_wing = df_wing.filter(items=['frame', 'class_name'])
    df_wing_list = df_wing.values.tolist()
    df_wing_list = np.array(df_wing_list)
    return df_wing_list

# Accuracy Calculation

In [None]:
def cal_accuracy(wing_boris_list, df_wing_list):
    accuracy_list = []
    manual_num_list = wing_boris_list[:, 0]


    for k in range(wing_boris_list.shape[0]):
        manual_line_list = wing_boris_list[k]
        if manual_line_list[1]=="trophallaxis":
            if manual_line_list[0] in df_wing_list[:, 0]:
                accuracy_list += [[k, True]]
            else:
                accuracy_list += [[k, False]]

        elif manual_line_list[1]=="no":
            if manual_line_list[0] not in df_wing_list[:, 0]:
                accuracy_list += [[k, True]]
            else:
                accuracy_list += [[k, False]]
        else:
            print("error")

    # print(len(accuracy_list), len(wing_boris_list)) 
    accuracy_df = pd.DataFrame(accuracy_list, columns=['frame', 'detection'])
    accuracy_true = len(accuracy_df[accuracy_df["detection"]==True])
    accuracy_all = len(accuracy_df)

    accuracy = accuracy_true/accuracy_all
    return accuracy, accuracy_true, accuracy_all

# Precision Calculation

In [None]:
def cal_precision(wing_boris_list, df_wing_list):
    precision_list = []
    manual_num_list = wing_boris_list[:, 0]

    # print(df_wing_list.shape[0])

    for j in range(df_wing_list.shape[0]):
        number = df_wing_list[j, 0]
        if number in manual_num_list:
            if wing_boris_list[int(number), 1] == "trophallaxis":
                precision_list += [[int(number), True]]
            else:
                precision_list += [[int(number), False]]
        else:
            precision_list += [[int(number), False]]

    # print(len(precision_list), len(df_wing_list))

    precision_df = pd.DataFrame(precision_list, columns=['frame', 'detection'])
    precision_true = len(precision_df[precision_df["detection"]==True])
    precision_all = len(precision_df)

    precision =  precision_true/precision_all
    return precision,  precision_true, precision_all

# Recall Calculation

In [None]:
def cal_recall(wing_boris_list, df_wing_list):
    recall_list = []
    manual_num_list = wing_boris_list[:, 0]


    for k in range(wing_boris_list.shape[0]):
        manual_line_list = wing_boris_list[k]
        if manual_line_list[1]=="trophallaxis":
            if manual_line_list[0] in df_wing_list[:, 0]:
                recall_list += [[k, True]]
            else:
                recall_list += [[k, False]]
        else:
            # print("error")
            pass

    # print(len(recall_list), len(wing_boris_list)) 
    recall_df = pd.DataFrame(recall_list, columns=['frame', 'detection'])
    recall_true = len(recall_df[recall_df["detection"]==True])
    recall_all = len(recall_df)

    recall = recall_true/recall_all
    return recall, recall_true, recall_all

# Calculate F1 score

In [None]:
def cal_f1_score(precision, recall):

    F1 = (2*precision*recall) /(precision + recall)
    return F1


In [None]:
df_file_list = pd.read_csv('../data/file_list.csv')
file_list = df_file_list.values.tolist()
file_list = np.array(file_list)

In [None]:
# 初期化
result_list = np.empty((0, 8))  # 初期の空の二次元配列
all_precision_true = 0
all_precision_all = 0
all_recall_true = 0
all_recall_all = 0
all_accuracy_true = 0
all_accuracy_all = 0
total_frame = 0

for file_line in tqdm(file_list):
    yoru_data_path = f"../data/YORU_data/{file_line[0]}"
    df_yoru = pd.read_csv(yoru_data_path)
    df_wing_list = extract_wing_frame(df_yoru)
    
    boris_data_path = f"../data/BORIS_data/{file_line[1]}"
    df_boris = pd.read_csv(boris_data_path)
    wing_boris_list, last_frame = convert_frame_data(df_boris)
    
    precision, precision_true, precision_all = cal_precision(wing_boris_list, df_wing_list)
    recall, recall_true, recall_all = cal_recall(wing_boris_list, df_wing_list)
    accuracy, accuracy_true, accuracy_all = cal_accuracy(wing_boris_list, df_wing_list)
    f1_score = cal_f1_score(precision, recall)
    
    file_line = np.append(file_line, [accuracy, precision, recall, f1_score, last_frame])
    file_line = file_line.reshape(1, -1)  # 1行の2次元配列に変換
    result_list = np.vstack((result_list, file_line))
    
    all_precision_true += precision_true
    all_precision_all += precision_all
    all_recall_true += recall_true
    all_recall_all += recall_all
    all_accuracy_true += accuracy_true
    all_accuracy_all += accuracy_all
    total_frame += int(last_frame)
    
all_accuracy = all_accuracy_true / all_accuracy_all
all_precision = all_precision_true / all_precision_all
all_recall = all_recall_true / all_recall_all

value1 = all_precision_true + all_recall_true
value2 = all_precision_all + all_recall_all
all_f1_score = (2 * all_precision * all_recall) / (all_precision + all_recall) 

total_row = np.array(["total", "total", "total", all_accuracy, all_precision, all_recall, all_f1_score, total_frame])
result_list = np.vstack((result_list, total_row.reshape(1, -1)))  # 1行の2次元配列に変換して追加


In [None]:
result_list

In [None]:
results = pd.DataFrame(result_list, columns = ["yoru_data_name", "boris_data_name", "movie_idex", "accuracy", "precision", "recall", "f1_score", "movie_total_frames"])
results.to_csv('../outputs/compared_data_ant_trophallaxis.csv')