In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
class BehavioralSession:
    
    def __init__(self, name, raw_csv_path):
        self.name = name
        self.raw_csv_path = raw_csv_path
        self.preprocessed_csv = None
    
    def preprocess_csv(self):
        
        """
        Deleting rows under the column "Evnt_Name" that equal 0
        """
        df = pd.read_csv(self.raw_csv_path)
        print("Prev length: ", len(df))
        # print(df.loc[0]["Evnt_Time"], " is of type ", type(df.loc[0]["Evnt_Time"]))
        df = df[df.Evnt_Time != 0]
        print("After filtering: ", len(df))
        
        """
        Keeping a count of number of trials initiated in the session.
        """
        is_new_trial = (df.Item_Name == "Forced-Choice Trials Begin") | (df.Item_Name == "Free-Choice Trials begin") #series of booleans
        df["is_new_trial"] = is_new_trial #new column whether it is a new trial or not
        df["is_new_trial"].value_counts()
        df["trial_num"] = np.cumsum(df["is_new_trial"]) # counts "True" as 1 and "False" as 0, replacing the cell with the cumulative sum as it iterates through column
        
        if self.preprocessed_csv == None:
            self.preprocessed_csv = df
    
    def get_df(self):
        return self.preprocessed_csv

In [14]:
#/home/rory/Rodrigo/BehavioralDataProcessing/Pho_Vid_Package/74 12042019.csv
#/home/rory/Rodrigo/BehavioralDataProcessing/BLA-INSC-6 05182021.csv
ABET_1 = BehavioralSession("BLA-INSC-6 05182021", "/home/rory/Rodrigo/BehavioralDataProcessing/BLA-INSC-6 05182021.csv")
ABET_1.preprocess_csv()
df = ABET_1.get_df()
grouped_by_trialnum = df.groupby("trial_num")
processed_behavioral_df = grouped_by_trialnum.apply(BehavioralUtilities.process_csv) #is a new df, it's not the modified df
BehavioralUtilities.add_winstay_loseshift_loseomit(processed_behavioral_df)
verify = BehavioralUtilities.verify_table(processed_behavioral_df)
print(verify)
processed_behavioral_df

Prev length:  6131
After filtering:  6056
Multiple ABET 'trial possible' signals to ISX in trial 0: [0.001 1.   ]
Multiple ABET 'trial possible' signals to ISX in trial 9: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 10: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 11: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 12: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 13: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 14: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 15: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 16: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 17: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 18: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 19: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 20: [1. 1.]
Multiple ABET 'trial possible' signals to ISX in trial 21: [1. 1.]
Multiple ABET 't

Unnamed: 0_level_0,Block,Trial Type,Reward Size,Trial Possible (s),Start Time (s),Choice Time (s),Collection Time (s),Shock Ocurred,Omission,Win or Loss,Learning Stratergy
trial_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1.0,,,0.001,,,,False,,,
1,1.0,Forced,Small,1.000,68.849,75.781,77.023,False,,,
2,1.0,Forced,Small,1.000,85.744,87.922,88.887,False,,,
3,1.0,Forced,Small,1.000,97.454,99.344,100.305,False,,,
4,1.0,Forced,Large,1.000,122.043,125.848,126.688,False,,,
...,...,...,...,...,...,...,...,...,...,...,...
89,3.0,Free,Large,1.000,1932.306,1944.001,1945.825,True,,Loss,
90,3.0,Free,Large,1.000,1959.430,1965.439,1968.293,True,,Loss,
91,3.0,Free,Large,1.000,1979.032,1988.280,1989.341,False,,Win,
92,3.0,Free,Large,1.000,2017.971,2035.505,2037.250,True,,Loss,Win Stay


In [15]:
processed_behavioral_df.to_csv("/home/rory/Rodrigo/BehavioralDataProcessing/BLA-INSC-6_05182021_processed.csv",index=True)

In [12]:
class BehavioralUtilities:
    
    def process_csv(df):
        #df is actually a row being inputed
        def get_row_idx():
            return df.name
            #print(df.name, "type:",type(df.name))
        """ 
            The raw ABET file gives us all possible descriptors of the trial, 
        but has another column that tells us whether this descriptor is actually 
        true for this trial. For these functions, any() goes through the series (determined
        by how you grouped the table) and if it finds a True or non-zero value, it's output
        will be according to whatever task we are tracking for this trial. One series
        is a list of values taken from various rows from the raw data in which that rows' labels
        for trials was the same. For example, if there were 165 trials identified for the session,
        then there will be 165 series' for a given behavior/descriptor. We get one output per each function
        that alots the value to the same row as the trial number that it found it. Every function is replacing
        the column for that series we are adding onto the 165 rows that are waiting to be filled by a series.
        """
        def get_block_num():

            if (df[df["Item_Name"] == "Session1"]["Arg1_Value"]).any():
                return 1
            elif (df[df["Item_Name"] == "Session2"]["Arg1_Value"]).any():
                return 2
            elif (df[df["Item_Name"] == "Session3"]["Arg1_Value"]).any():
                return 3

        def get_force_or_free():
            # print(df["Item_Name"] == "Forced-Choice Trials Begin")
            if (df["Item_Name"] == "Forced-Choice Trials Begin").any():
                return "Forced"
            elif (df["Item_Name"] == "Free-Choice Trials begin").any():
                return "Free"

        def get_rew_size():
            # print (df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"])
            if df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"].empty: # meaning there was no value found for feeder #2 or Arg1_Vlaue in the series. so most likely an omission
                return np.nan
            elif len((df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"])) == 1: #when the series is not empty and has only one value (arg1_value is the amount given)
                if float(df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"].values[0]) < 1.2:
                    return "Small"
                elif float(df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"].values[0]) >= 1.2:
                    return "Large"
            else: #if series contains two values, means mouse was fed twice, not good
                print("Multiple rewards in trial %s: %s"%(df[df["Item_Name"] == "Feeder #2"]["trial_num"].values[0], df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"].values))
                return np.nan

        def get_iftrial_possible():
            """As indicated by "TTL#\d', is the time at which ABET tells ISX software that there is a trial available.
            There should only be value of 0.001s at the beginning of each session and all later values for TTL should be 1.0s
            (meaning only the first trial should have 2 TTLs show up and later ones should only have 1 TTL show up)
            Caveat: It's common that we get two TTLs for each trial, we are ignoring the 2nd TTL that appears per each trial
            by selecting values[0] in the series of values.
            """

            #print(df[df["Item_Name"].str.match("TTL")]["Arg1_Value"].values)
            #print(len(df[df["Item_Name"].str.contains("TTL")]["Arg1_Value"].values))
            if df[df["Item_Name"].str.contains("TTL")]["Arg1_Value"].empty:
                return np.nan
            elif len(df[df["Item_Name"].str.contains("TTL")]["Arg1_Value"]) == 1:
                return df[df["Item_Name"].str.contains("TTL")]["Arg1_Value"].values[0]
            else:
                print("Multiple ABET 'trial possible' signals to ISX in trial %s: %s"%(df[df["Item_Name"].str.contains("TTL")]["trial_num"].values[0], df[df["Item_Name"].str.contains("TTL")]["Arg1_Value"].values))
                return df[df["Item_Name"].str.contains("TTL")]["Arg1_Value"].values[0]

        def get_trial_start_time():

            # print(df.loc[df["Item_Name"].str.contains("Trials Begin", case=False)]) #doesn't account for case when finding this pattern of string
            if df[df["Item_Name"].str.contains("Trials Begin", case=False)].empty:
                return np.nan
            # len(df[df["Item_Name"].str.contains("Trials Begin", case=False)]) == 1
            elif len(df[df["Item_Name"].str.contains("Trials Begin", case=False)]) == 1:
                return df[df["Item_Name"].str.contains("Trials Begin", case=False)]["Evnt_Time"].values[0]
            else: #if the trial contains mulitple start times
                print("Multiple trial start times in trial %s: %s"%(df[df["Item_Name"].str.contains("Trials Begin", case=False)]["trial_num"].values[0], df[df["Item_Name"].str.contains("Trials Begin", case=False)]["Evnt_Time"].values))
                return np.nan

        def get_choice_time():
            """ Uses the output of get_rew_sizes() because, a reward is mouse-initiated, so there can't be a choice if rew_size is NaN.
            Regardles whter the choice was large or small, just get the event time of when that choice was made.
            """
            if df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"].empty: # meaning there was no value found for feeder #2 or Arg1_Vlaue in the series. so most likely an omission
                return np.nan
            elif len((df[df["Item_Name"] == "Feeder #2"]["Arg1_Value"])) == 1: #when the series is not empty and has only one value (arg1_value is the amount given)
                return df[df["Item_Name"] == "Feeder #2"]["Evnt_Time"].values[0]
            else:
                print("Multiple choice times in trial %s : %s"%(df[df["Item_Name"] == "Feeder #2"]["trial_num"].values[0], df[df["Item_Name"] == "Feeder #2"]["Evnt_Time"].values))
                return df[df["Item_Name"] == "Feeder #2"]["Evnt_Time"].values[0]

        def get_collection_time():
            if df[df["Item_Name"].str.contains("Reward Retrieved")].empty:
                return np.nan
            elif len(df[df["Item_Name"].str.contains("Reward Retrieved")]) == 1:
                return df[df["Item_Name"].str.contains("Reward Retrieved")]["Evnt_Time"].values[0]
            else:
                print("Multiple rewards retrieved in trial %s: %s"%(df[df["Item_Name"].str.contains("Reward Retrieved")]["trial_num"].values[0], df[df["Item_Name"].str.contains("Reward Retrieved")]["Evnt_Time"].values))
                return df[df["Item_Name"].str.contains("Reward Retrieved")]["Evnt_Time"].values[0]

        def shocked():
            """A shock value of 0.0 means 
            """
            if df[df["Item_Name"] == "shock_on_off"]["Arg1_Value"].empty: #there wasn't a shock string found, so shock wasnt recorded (shock impossible, so false)
                return False
            elif len(df[df["Item_Name"] == "shock_on_off"]["Arg1_Value"]) == 1: #only one shock occurred in trial
                shock_value = df[df["Item_Name"] == "shock_on_off"]["Arg1_Value"].values[0]
                if shock_value == 0:
                    return False
                else:
                    return True
            else:
                print("Multiple shocks in trial %s: %s"%(df[df["Item_Name"] == "shock_on_off"]["trial_num"].values[0]),df[df["Item_Name"] == "shock_on_off"]["Arg1_Value"].values)
                shock_value = df[df["Item_Name"] == "shock_on_off"]["Arg1_Value"].values[0]
                if shock_value == 0 or shock_value == np.nan:
                    return False
                else:
                    return True

        def omission():
            #one will always be empty, so has to be "and"
            #print((df[df["Item_Name"] == "ITI TO (Blank touch or Omission)"]).any())
            result = None
            #the below condition will be true sometimes even though it is ITI omission, so don't return anything yet
            if df[df["Item_Name"].str.contains("Omission of a Free Trial", case=False)].empty and df[df["Item_Name"].str.contains("Omission of a Forced Trial", case=False)].empty and (df["Item_Name"] == "ITI TO (Blank touch or Omission)").any() == False:
                result = np.nan
            #sometimes ABET never puts down neither "Omission of a Free Trial" nor "Omission of a Forced Trial" and it's still an ITI, so can appear regardless whether those "Omission..." strings appear 
            elif (df["Item_Name"] == "ITI TO (Blank touch or Omission)").any(): # if this string also exists in the trial
                result = "ITI"
            elif len(df[df["Item_Name"].str.contains("Omission of a Free Trial", case=False)]) == 1 or len(df[df["Item_Name"].str.contains("Omission of a Forced Trial", case=False)]) == 1:
                result = "Omission"
            else:
                result = "Omission"

            return result
        def win_or_loss():
            """ If received "Large" reward and received shock as "False", it's a win.
            If received "Large" reward and received shock as "True", it's a loss.
            """
            if force_or_free == "Free":  
                if rew_size == "Large" and shocked == False:
                    return "Win"
                elif rew_size == "Large" and shocked == True:
                        return "Loss"
                else:
                    return np.nan

        """All these are just one value, pertaining to one row for this particular trial the apply func is on"""
        block_num = get_block_num() # get 1 block number
        force_or_free = get_force_or_free() # whether trial was forced or free
        rew_size = get_rew_size() # rew size (one element as output)
        trial_possible = get_iftrial_possible() # is trial possible (bool): indicated by isx telling behavioral software that a trial is possible
        start_time = get_trial_start_time() 
        choice_time = get_choice_time()
        collection_time = get_collection_time()
        shocked = shocked()
        omission = omission() #regardless whether trial was forced or free
        win_loss = win_or_loss() # whether the trial was win or loss

        """This series is added on to the waiting new grouped table in a variable I indicated"""
        return pd.Series({

             "Block": block_num,
             "Trial Type": force_or_free,
             "Reward Size": rew_size,
             "Trial Possible (s)": trial_possible,
             "Start Time (s)": start_time,
             "Choice Time (s)": choice_time,
             "Collection Time (s)": collection_time,
             "Shock Ocurred": shocked,
             "Omission": omission,
             "Win or Loss": win_loss

            })
    def add_winstay_loseshift_loseomit(df):
        """If previous trial was a win, and current trial they recieved "Large" reward, its a win-stay.
        I previous trial was a loss, and current trial they recieved "Small" reward, its a lose-shift.
        """
        learning_strats = []
        #identify previous trial
        learning_strats.append(np.nan) #first row always nan for leaning stratergy
        for row_idx in range(1,len(df)): #skip the first row, always will be nan
            if df.iloc[row_idx - 1][9] == "Win" and  df.iloc[row_idx][2] == "Large": # win stay
                learning_strats.append("Win Stay")
            elif df.iloc[row_idx - 1][9] == "Loss" and df.iloc[row_idx][2] == "Small": # lose shift
                learning_strats.append("Lose Shift")
            elif df.iloc[row_idx - 1][9] == "Loss" and df.iloc[row_idx][8] == "True": # lose omit
                learning_strats.append("Lose Omit")
            else:
                learning_strats.append(np.nan)
        print(learning_strats)
        df["Learning Stratergy"] = learning_strats
        
    def verify_table(df):
        for row_idx in range(1,len(df)): # skip trial_num 0
            if df.iloc[row_idx]["Reward Size"] == np.nan and df.iloc[row_idx]["Choice Time"] == np.nan and df.iloc[row_idx]["Collection Time"] == np.nan and df.iloc[row_idx]["Omission"] == np.nan:
                return "Something wrong in row %s"%(row_idx)
        return "All good!"
    