In [9]:
import argparse
import glob
import pandas as pd
import re
import subprocess

In [2]:
# """Generate the parser for the command line"""

# parser = argparse.ArgumentParser(description='Check for errors in BORIS behavior file scoring')
# parser.add_argument('--path', help='Input a string path to the folder BORIS_files_input')

# args = parser.parse_args()

In [None]:
def preprocess_behavior_file(path, reader = "csv", frametimes_path = ""):
    """This function preprocesses csv exports from BORIS files and returns a DataFrame object. It also checks 
    for erroneous BORIS event inputs (i.e. START followed by another START event or STOP followed by another 
    STOP event)"""
    
    if (reader == "csv"):
        df = pd.read_csv(path)
    elif (reader == "xlsx"):
        df = pd.read_excel(path)

    if (frametimes_path):
        frametimes = pd.read_csv(frametimes_path)

    df.columns = df.iloc[14].to_list()
    df = df.drop(df.index[0:15])
    df = df.drop(["Media file path", "Total length", "FPS", "Subject", "Behavioral category", "Comment"], axis = 1)
    
    # Drop point events and any intervention events
    df = df[df.Status != "POINT"]
    df = df[df.Behavior != "Intervention"]
    df = df[df.Behavior != "Don't Score"]
    df = df[df.Behavior != "Disconnected"]

    # Reset the index after dropping events
    df = df.reset_index(drop = True)
    df.loc[0, "Duration"] = "NaN"
    
    # Convert the time column to a float from a string
    df["Time"] = df["Time"].apply(lambda x: float(x))
    if frametimes_path:
        df["Frametime"] = df["Time"].apply(lambda x: frametimes.iloc[round(x * 20), 0] if (round(x * 20) <= frametimes.shape[0] - 1) else frametimes.iloc[frametimes.shape[0] - 1, 0])
        start_time = df["Frametime"][0]
        df["Time"] = df["Frametime"].apply(lambda x: x - start_time)
    
    # Generate the Duration column
    for i in range(0, len(df)):
        if i % 2 != 0:
            if (frametimes_path):
                df.loc[i, "Duration"] = float(df.loc[i, "Frametime"]) - float(df.loc[i - 1, "Frametime"])
            else:
                df.loc[i, "Duration"] = float(df.loc[i, "Time"]) - float(df.loc[i - 1, "Time"])
        else:
            df.loc[i, "Duration"] = 0
    df["Duration"] = pd.to_numeric(df["Duration"])
            
    # Check for errors in BORIS event inputs
    current = "START"
    
    for i in range(len(df)):
        if df.loc[i, "Status"] != current:
            print("Check for repeated status event at row for file: " + path + " at " + str(i))
        elif current == "START":
            current = "STOP"
        else:
            current = "START"
           
    df["Location"] = df["Duration"]
    
    mapping = {"Center": "Center", "Huddle left": "Left", "Huddle right": "Right", "Interact left": "Left", "Interact right": \
     "Right", "Left": "Left", "Right": "Right", "Sniff left": "Left", "Sniff right": "Right"}
    
    df["Location"].map(mapping)
    
    return df

In [None]:
preprocess_behavior_file()

In [47]:
class Animal:
    """An object representing a single animal that contains information about the animal including a BORIS behavior file."""
    animal_id: str
    behavior_file: str
    behavior_dataframe: pd.DataFrame
    
    def __init__(self, animal_id: str, behavior_file: str = "") -> None:
        if behavior_file:
            self.behavior_dataframe = pd.read_csv(behavior_file)
        self.animal_id = animal_id
        
    def __str__(self) -> str:
        return self.animal_id
    
    def set_behavior_file(self, df: pd.DataFrame) -> None:
        self.behavior_file = df
        
    def get_behavior_file(self, path: str) -> pd.DataFrame:
        """Takes in a string path to a BORIS csv file and returns a pandas dataframe"""
        return pd.read_csv(path)
    
    def get_animal_id(self) -> str:
        return self.animal_id
    
class Cohort:
    """An object representing a cohort of animals that contains information about the cohort including a cohort id, the sex
    of the cohort, the genotype of the cohort, and the cohort of animals"""
    cohort = set()
    cohort_id: str
    sex: str
    genotype: str
    
    def __init__(self, cohort_id, cohort=set()) -> None:
        self.cohort_id = cohort_id
        if len(cohort) != 0:
            self.cohort = cohort
            
    def __str__(self) -> str:
        res = 'The current animals in the cohort are: \n'
        for animal in self.cohort:
            res += animal.get_animal_id() + '\n'
        return res
        
    def add_animal_to_cohort(self, animal: Animal):
        self.cohort.add(animal)

In [48]:
path = '/Users/josephgmaa/Research/BORIS behavior error checking/BORIS_files_input'
path_tests = '/Users/josephgmaa/Research/BORIS behavior error checking/test_BORIS_files'

In [51]:
test_cohort = Cohort('separation_reunification')

# Generate test cohort of animals from test directory and adding to cohort
for file in glob.glob(path_tests + '/*'):
    a = re.search('[V][0-9][0-9][0-9][0-9]', file)
    animal_id = a.group(0)
    animal = Animal(animal_id, file)
    test_cohort.add_animal_to_cohort(animal)

In [53]:
test_cohort.cohort

TypeError: 'set' object is not subscriptable