Create by Tianze (Steven) Shou during the RETTL project in Summer 2022 in Vincent's lab under the mentorship of Shamya Karumbaiah. 

This file is serves as a pipeline that takes in Pozyx position data, detect stops according to given parameters (duration and radius), and compare with the distilled observation log event data. 

In [56]:
# python setup 
import pandas as pd
import stop_detection as sd 
import numpy as np
import copy
from tqdm import tqdm

In [57]:
# function definition 
def isWithinStop(row, stop): 

    """
    :param row: a pandas dataframe with only one row, has to have column named `time_stamp` 
    :param stop: a stop represented by a tuple, denoting the start and end timestamp of a stop
    :return: returns a boolean of whether this row's timestamp is within the given stop 
    """

    assert(type(stop) == tuple and len(stop) == 2) 

    stopStart, stopEnd = stop 
    rowTimestamp = row.loc["time_stamp"]

    return stopStart <= rowTimestamp and rowTimestamp <= stopEnd



def getStopEvent(posDF, stops): 

    """
    This function generates the events and centroids of classroom actor's stops
    :param posDF: a pandas dataframe denoting the position data of an real-world object. Must have columns `chosen_X`, `chosen_Y`, and `time_stamp` 
    :param stops: an array of tuples denoting the start and end timestamps of stops. Usually values returned by sd.getStops()
    :return: returns a tuple of three arrays. The first is an array of strings, denoting the stopping events; the second 
             is a string of tuple points, denoting stop centroid coordinates; the third is a list of stop indeces to be 
             put into posDF. 
    """

    # values to be returned 
    events = [] 
    centroids = []
    stop_indeces = [] 

    i, j = 0, 0 # i is index for posDF; j is for indexing the list stops
    while(j < len(stops) ): 
        currStop = stops[j] 
        currStopStartInd = i
        inStop = False

        while(i < len(posDF) and isWithinStop(posDF.loc[i], currStop)): 
            inStop = True # indicate to the following code that we do run into the current stop 
            i += 1
            
        if(inStop): # did run into a stop, currStopInd and i shoud not be the same
            assert(currStopStartInd != i)

            rows = posDF.loc[currStopStartInd:i-1] # rows that are within currStop 
            points = sd.cols2tuples(rows.chosen_X, rows.chosen_Y) 
            centroid = sd.getCentroid(points) # get the centroid of current stop 
            assert(type(centroid) == tuple and len(centroid) == 2) # ensures that centroid is a point represented by a tuple

            stop_index = j 

            event = "Stopping in location: " + str(centroid) # format event in string
            
            # need to append multiple events since we are treating stop as a continuous event now
            for k in range(i - currStopStartInd):
                events.append(event) 
                centroids.append(centroid) 
                stop_indeces.append(stop_index)

            j += 1 # we have found all rows corresponding to the current stop, go to next stop

        else: # did not run into the current stop 
            assert(currStopStartInd == i)

            # not stopping event, denote as moving 
            events.append("Moving") 
            centroids.append(np.nan)
            stop_indeces.append(np.nan)

            # need to go to next row in position dataframe 
            i += 1
    
    # j reaches the end of stop list, but we still need to populate the events list to the same length as the original position dataframe 
    while(i < len(posDF)): 
        events.append("Moving") 
        centroids.append(np.nan)
        stop_indeces.append(np.nan)
        i += 1

    assert( len(events) == len(posDF) )
    assert( len(centroids) == len(posDF) )
    assert( len(stop_indeces) == len(posDF) )
            
    return events, centroids, stop_indeces


def getClosestObjs(actorDF, objDF, rng): 
    """
    This function returns a list of tuples, where the list is of the same length as actorDF. Tuples contain the names of top objects closest to the centroid stopping points specified in actorDF. Length of tuples are specified by numOfObjs parameter
    
    :param actorDF: pandas dataframe documenting the position of an `actor` by continuous unix timestamp. Must contain column `centroid` 
    :param objDF: pandas dataframe documenting the coordinates of all classroom objects. Must have columns: `object`, `X`, and `Y` 
    :param rng: range parameter; of any classroom objects is with the range distance of the stop centroid, this object gets thrown to the set of objects
    :return: returns a list of tuples. List is of the same length as actorDF. Tuples contain top objects closest to centroids of stops 
    """
    assert(type(rng) == int or type(rng) == float) 
    assert(rng > 0)

    closestObjs = [] # value to be returned, going to contain dictionaries in {<objName1>:<distance1>, <objName2>:<distance2>} format 
    centroids = actorDF.centroid # centroid points for stops, represented by tuples of two ints 
    objPoints = sd.cols2tuples(objDF.X, objDF.Y) # X Y coordinates for classroom objects 
    objNames = objDF.object 
    assert(len(objNames) == len(objPoints)) # these two list/series should have one-to-one corresponding relation 

    i = 0 # indexing for centroids 
    while(i < len(centroids)): 

        centroid = centroids.iloc[i]
        if( np.any(np.isnan(centroid)) ): # actor is not in a stop 
            closestObjs.append(np.nan)

        elif(i - 1 >= 0 and centroids[i-1] == centroid): # if this current centroid is not the first one in the dataframe, and the previous centroid is the same as the current
            objDistDict = copy.deepcopy(closestObjs[len(closestObjs)-1]) # copy the previous object-distance dictionary
            closestObjs.append(objDistDict) # then append the copy

        else: # this means that we need to go through the coordinates of all the classroom objects to find these within range and append them to closetObjs list 
            objDistDict = dict() # create an empty dictionary to hold the entries in the future 
            
            j = 0
            while(j < len(objNames)): 
                if( sd.getDist(centroid, objPoints[j]) < rng ): # if object j within range
                    objDistDict[ objNames[j] ] = sd.getDist(centroid, objPoints[j]) # create a new entry as <object name>:<distance to centroid>
                j += 1

            closestObjs.append(objDistDict) 

        i += 1

    assert(len(closestObjs) == len(actorDF)) # ensure that output length is correct 
    return(closestObjs)

def isEmpty(obj): 
    return not bool(obj)


def getObsInTimeframe(obsDF, timeframeStart, timeframeEnd): 
    obsTimestamps = obsDF["timestamp"] 
    # timestamps of observation data are ensured to be monotonically sorted 
    timeframeStartInd = obsTimestamps.searchsorted(timeframeStart) 
    timeframeEndInd = obsTimestamps.searchsorted(timeframeEnd) 
    return obsDF.loc[timeframeStartInd:timeframeEndInd] 

def calcTriangulationScoreAndPercentages(posDF, obsDF, n_stops, timeframe=10): 

    """
    :param posDF: distilled pozyx position data with stopping event and possible subjects specified 
    :param obsDF: distilled observation log data. See observation_distilled_sprint1_shou.tsv for an example 
    :param reward: reward points given when true subject in observaiton log appears in the set of possible subjects 
    :param penalty: penalty points deducted when incorrent subjects appear in possible subject set 
    :param timeframe: specified how many seconds we look back in time to find the correct subject, unit is second. 
    :return: returns the triangulation validation score for a combination of parameters; higher score means better alignment between modalities 
    """

    # these two counts are for the calculation of recall 
    rightMatchCount = 0 
    totalMatchCount = 0 
    # these two counts are for the calculation of ~precision 
    rightGuessCount = 0
    totalGuessCount = 0
    # score of kind of the loss function here 
    score = 0
    
    i_rewards = i_penalties = 0
    i_hits = i_false_alarms_inside = i_false_alarms_outside = i_misses = 0

    # count for guesses that is accounted for during the first iteration, see 
    # Conrad <> Steven meeting notes for more information 
    seen_stop_indeces = set() 

    i = 0  # indexing for observation data 
    
    # CB: G, S implementation looks good, seem minor comments below
    while(i < len(obsDF)): 
        obsRow = obsDF.iloc[i] 
        obsEvent = obsRow["event"] # event name specified in observation data 

        # events that we can to valid with position data 
        if(obsEvent == "Talking to student: ON-task" or
           obsEvent == "Talking to student: OFF-task" or 
           obsEvent == "Talking to small group: ON-task" or 
           obsEvent == "Talking to small group: OFF-task"): 

            trueSubjects = obsRow["subject"] # get the true subject(s) from observation data 
            assert(type(trueSubjects) == str and trueSubjects != "") # should now be a string but not empty, in format like "12;13"
            trueSubjects = trueSubjects.split(";") # split by semicolon since seat numbers are demilited by semicolons in distilling process 

            # we look both back and forward in time in position dataframe to check for occurrence of the true subject 
            back = timeframe / 2
            forward = timeframe - timeframe / 2
            assert(back + forward == timeframe)
            timeframeCenter = obsRow["timestamp"] 
            timeframeStart = timeframeCenter - back 
            timeframeEnd = timeframeCenter + forward 
            # filter the position dataframe to get the rows within the timeframe and stopping 
            posInTimeframe = posDF[timeframeStart < posDF["time_stamp"]][posDF["time_stamp"] < timeframeEnd]
            
            
            # CB: Bookmark 2.0:
            # CB: Step 1: Add a column to posDF which is list(range(1, posDF.shape[0]+1))
            # Step 2: Declare set outside of loop called seen_indices
            # Step 3: seen_indices.add(posInTimeframe['index'])
            # Step 4: After loop: Unseen guesses are posInTimeframe[~posInTimeframe['index'].isin(seen_indices)]["possibleSubjects"] 
            subjSets = posInTimeframe["possibleSubjects"] 
            stop_indeces = posInTimeframe["stop_index"] 
            seen_stop_indeces.union( set(stop_indeces) )

            trueSubjects = [ "seat" + trueSubject for trueSubject in trueSubjects ]
            S = set(trueSubjects) # S is the target set here 
            
            
            G = set() # populate Guess set G 
            for g in subjSets: # g is the individual guess set here, will be merged into big G 
                if isinstance(g, dict): 
                    G = G.union(g)
                else: 
                    # this means that no guess is in the guess set g
                    assert np.isnan(g) 
                    
            hits = len( S.intersection(G) ) # number of hits = | S \intersect G | 
            misses = len( S - G ) 
            false_alarms = len( G - S )

            i_hits += hits 
            i_misses += misses 
            i_false_alarms_inside += false_alarms  # CB: rename i_false_alarms_inside
            
            for ind in range(len(trueSubjects)): # go thru trueSubjects list to see if they are included in possible subject sets 

                # trueSubject = "seat" + trueSubjects[ind] # convert to "seat12" format to align with position data 
                trueSubject = trueSubjects[ind]
                bookMarkedSet = None # we bookmark the set we have seen to avoid counting repeating guesses/matches, only unique sets 

                for subjSet in subjSets: 
                    
                    # CB: If I understand correctly, this counts the number of unique sets?
                    # CB: If yes, note that bookmarked sets will be overwritten at each change 
                    #     in the sequence, which does not exactly count the number of unique sets,
                    #     e.g., in sequence, AACAD -> {A,C,A,D} and not {A,C,D}
                    # The more pythonic way of writing this would be a set of sets and len(),
                    # where unfortunately the intermediate step must be a list, i.e.,
                    #the_list = [set([1,2]),set([1,2]), set([1])]
                    #the_list_len = len(set(frozenset(s) for s in the_list))
                    # Challenge (only if you like): Re-write this for loop into two lines
                    
                    
                #     if(type(subjSet) == dict): # this means that the teacher is detected to be stopping 
                #         if(isEmpty(subjSet)): 
                #             #score -= penalty # penalize if nothing is in the set 
                #             i_penalties += 1
                #         elif(trueSubject in subjSet): 
                #             #score += reward # reward if true subject is in the set 
                #             i_rewards += 1
                #             #score -= penalty * max(len(subjSet) - 1, 0) # penalize for every wrong guess in the set 
                #             i_penalties += max(len(subjSet) - 1, 0)
                #         else: 
                #             #score -= penalty * len(subjSet) # penalize since all guesses are wrong 
                #             i_penalties += len(subjSet)
                #     else: # teacher motion is detected, so subject set is NaN 
                #         assert(np.isnan(subjSet)) 
                #         #score -= penalty # penalize as if it is an empty set 
                #         i_penalties += 1

                    # do counting for the percentages only to unique subject sets 
                    # we have seen this before, so skip
                    if(subjSet == bookMarkedSet): 
                        pass 
                    # special case where subjSet is NaN, going to skip 
                    elif(not isinstance(subjSet, dict)): 
                        pass
                    # we have not seen this, bookmark this set and do counting 
                    else: 
                        bookMarkedSet = subjSet
                        totalGuessCount += len(subjSet) # the number of guesses is the number of guessed subjects 
                        totalMatchCount += 1 # one match attempt for each unique set 
                        if(trueSubject in subjSet): 
                            rightMatchCount += 1 # a correct match detected 
                            rightGuessCount += 1 # a correct guess detected 
                            # CB: rightMatchCount and rightGuessCount should always have the same value, no? why 2 values?
                            # CB: Do we need this if we have hit, miss, false alarm? My hunch is no
                            # CB: We only need the total counts
                            # CB: Please also rename guesscount to n_guessed_subjects_total  and 
                            # CB: match count to n_guesses_total
                            # CB: there are more intuitive names. Also, if time, check if the sets of sets operations
                            # CB: change the output. As mentioned above I think the counting is not done correctly at the moment.

        i += 1

    # we are going to increment false alarms with the quantity below to prevent 
    # encouraging the algo to generate numerous false stops just for ramdom 
    # guessing
    # CB:
    # See bookmark 2.0: Get step 4 unseen guesses (series of sets that are unseen guesses) sum([sum(set) for set in sets)]) | sum(df.sets.map(sum))
    # i_false_alarms_outside = (<n students in non=bookmarked sets>)
    # i_false_alarms_total = i_false_alarms_inside + i_false_alarms_outside

    for i in posDF.index: 
        event = posDF.loc[i, "event"]
        stop_index = posDF.loc[i, "stop_index"]
        # only check stops we did not check before hand 
        if ("Stopping" in event) and (stop_index not in seen_stop_indeces): 
            i_false_alarms_outside += len(posDF.loc[i, "possibleSubjects"])
            seen_stop_indeces.add(stop_index) # mark this stop as checked 

    try:
        f1 = rightMatchCount / totalMatchCount
    except ZeroDivisionError:
        f1 = np.nan 
    try:
        f2 = rightGuessCount / totalGuessCount
    except ZeroDivisionError:
        f2 = np.nan
    return f1, f2, i_hits, i_misses, i_false_alarms_inside, i_false_alarms_outside


def getPercentages(posStops, obsStops, timeframe, epsilon=0.01): 

    """
    :param posStops: stops datamined from position data, formatted as [(start_stop_1, end_stop_1), (start_stop_2, end_stop_2), ... ] 
    :param obsStops: stops datamined from observation data, formatted as [start_stop_1, start_stop_2, ... ] 
    :param timeframe: timeframe parameter specified in triangulation model 
    :return: returns three percentages, in-both%, in-position-only%, and in-observation-only%
    """

    assert(type(posStops) == list and type(obsStops) == list) 
    posStopsCount = len(posStops) 
    obsStopsCount = len(obsStops) 
    inPosCount = 0 # number of stops only in position data, not in observation data 
    inObsCount = 0 # number of stops only in observation data, not in position data 

    # loop thru position stops to get these only in position not in observation 
    # use the fact that obsStop is a strictly increasing list of ints 
    for posStopStart, posStopEnd in posStops: 

        back = timeframe / 2
        forward = timeframe - back 
        TFstart = posStopStart - back # timeframe start
        TFend = posStopEnd + forward  # timeframe end 

        for obsStop in obsStops: 

            if(obsStop < TFstart): 
                # observation stop is before the timeframe setup by position stop, continue to the next observation stop
                continue 
            elif(TFstart <= obsStop and obsStop <= TFend): 
                # this position stop is corresponding to this observation stop, so break 
                break
            else: 
                # when gets here, it means that this position stop is not corresponding to any observation stops
                assert(TFend < obsStop)
                inPosCount += 1
                break 

    # loop thru observation stops to get these only in observation not in position 
    # use the fact that posStops is a strictly increasing list of int tuples 
    for obsStop in obsStops: 
        for posStopStart, posStopEnd in posStops: 

            back = timeframe / 2
            forward = timeframe - back 
            TFstart = posStopStart - back # timeframe start
            TFend = posStopEnd + forward  # timeframe end 

            if(TFend < obsStop): 
                # observation stop is before the timeframe setup by position stop, continue to the next observation stop
                continue 
            elif(TFstart <= obsStop and obsStop <= TFend): 
                # this position stop is corresponding to this observation stop, so break 
                break
            else: 
                # when gets here, it means that this position stop is not corresponding to any observation stops
                assert(obsStop < TFstart)
                inObsCount += 1
                break 

    # now in-position-only and in-observation-only counts have been calculated 
    # we need to get in-both count 
    inBothCount = int((obsStopsCount + posStopsCount - inObsCount - inPosCount) / 2) 
    assert(inBothCount > 0) # safety check 
    totalCount = inBothCount + inPosCount + inObsCount

    # return the percentages 
    return inBothCount / totalCount, inPosCount / totalCount, inObsCount / totalCount 




In [1]:
def run_param_sweep(verbose=False, outputFileName = "", resolution={"low", "medium", "high", "fine_grained"}):
    """
    verbose: print progress, yes/no
    export: Write results to csv, yes/no 
    resolution: takes value from {"low", "medium", "high"}. Use "low" to test run code functionality. 
        Use "medium" to have fast, preliminary results. Use "high" to explore the search space more extensively. 
    """
    
    # PLEASE LOOK FOR TODO'S AS THEY ARE NECESSARY CHANGES TO SETUP THIS CODE CHUNK 

    # read teacher position data
    teacherPos = pd.read_csv("output_files/teacher_position_sprint1_shou.csv", index_col=False) 
    objPos = pd.read_csv("raw data/seating_chart_x_y_seat_only_sprint1_shou.csv", index_col=False) 
    obsLog = pd.read_csv("output_files/observation_events.tsv", sep="\t", index_col=False) 

    # TODO: dataframe to hold the result of the sweep
    sweepDF = pd.DataFrame() 
    #sweepDF = pd.read_csv("parameter_sweep_master_sprint1_shou.csv", index_col=False) 

    duration_grid = None
    radius_grid = None 
    range_grid = None
    timeframe_grid = None 
    
    if resolution == "low":
        duration_grid = np.arange(3, 30, step=1500)
        radius_grid = np.arange(200, 2000, step=100000)
        range_grid = np.arange(100, 1500, step=50000)
        timeframe_grid = np.arange(5, 19, step=100000)
    elif resolution == "high":
        duration_grid = np.arange(3, 30, step=4)
        radius_grid = np.arange(200, 2000, step=200)
        range_grid = np.arange(100, 1500, step=200)
        timeframe_grid = np.arange(5, 19, step=4)
    elif resolution == "medium": 
        duration_grid = np.arange(3, 31, step=9)
        radius_grid = np.arange(200, 2000, step=600)
        range_grid = np.arange(100, 1500, step=600)
        timeframe_grid = np.arange(5, 20, step=9) 
    elif resolution == "fine_grained": 
        duration_grid = np.arange(3, 31, step=3)
        radius_grid = np.arange(200, 2000, step=200)
        range_grid = np.arange(100, 2000, step=200)
        timeframe_grid = [10] # set timeframe to be 10 seconds, as discussed 
    else: 
        raise Exception(f"Unknown resolution level {resolution}")

    
    for duration in tqdm(duration_grid): 
        for radius in radius_grid: 
            for timeframe in timeframe_grid:

                # call stop detection utility, get starting and end timestamp for each stop with corresponding parameters (duration, radius)
                teacherStops = sd.getStops(teacherPos.chosen_X, teacherPos.chosen_Y, 
                                           teacherPos.time_stamp, teacherPos.periodID, 
                                           teacherPos.dayID, duration, radius) 
                obsStops = sd.getStopsFromObs(obsLog) 

                inBoth, inPos, inObs = getPercentages(teacherStops, obsStops, timeframe) 

                events, centroids, stop_indeces = getStopEvent(teacherPos, teacherStops) 
                teacherPos["event"] = events # populate event column for teacher positon dataframe 
                teacherPos["centroid"] = centroids 
                teacherPos["stop_index"] = stop_indeces 

                for rng in range_grid: 
                    rng = int(rng)

                    # check if the current set of parameters have been calculated 
                    if sweepDF.shape[0] > 0:
                        filteredDF = sweepDF[ sweepDF["duration"] == duration ]
                        filteredDF = filteredDF[ filteredDF["radius"] == radius ]
                        filteredDF = filteredDF[ filteredDF["range"] == rng ]
                        filteredDF = filteredDF[ filteredDF["timeframe"] == timeframe ] 
                        # if yes, skip this set and go to the next 
                        if( len(filteredDF) >= 1 ): 
                            if verbose:
                                print("Parameter set", duration, radius, rng, "skipped")
                                print("###########################################")
                            continue 

                    # get the objects and their correpsonding distances to centroid within rng (range) 
                    subjects = getClosestObjs(teacherPos, objPos, rng)
                    teacherPos["possibleSubjects"] = subjects 

                    calc = calcTriangulationScoreAndPercentages
                    rightMatchPercentage, rightGuessPercentage, \
                    n_hits, n_misses, n_false_alarms_inside, \
                    n_false_alarms_outside = calc(teacherPos, 
                                                  obsLog, 
                                                  len(teacherPos),
                                                  timeframe=timeframe) 

                    # update sweep dataframe with each parameter set we test for 
                    newRow = {"duration": [duration], 
                              "radius": [radius], 
                              "range": [rng], 
                              "perc_of_stops_in_both": inBoth, 
                              "perc_of_stops_in_pos": inPos, 
                              "perc_of_stops_in_obs": inObs, 
                              "right_match_percentage": [rightMatchPercentage], 
                              "right_guess_percentage": [rightGuessPercentage],
                              "timeframe": [timeframe],
                              'n_hits': [n_hits],
                              'n_misses': [n_misses], 
                              'n_false_alarms_inside': [n_false_alarms_inside], 
                              'n_false_alarms_outside': [n_false_alarms_outside]
                             } 
                    newDF = pd.DataFrame(newRow)
                    sweepDF = pd.concat([sweepDF, newDF], ignore_index=True)

                    if verbose: 
                        print(f"Hyperparameter timeframe is: {timeframe}") 
                        print("Parameter set (duration, radius, range) is:", duration, radius, rng) 
                        print(f"n_hits is {n_hits}")
                        print(f"n_misses is {n_misses}")
                        print(f"n_false_alarms_inside is {n_false_alarms_inside}")
                        print(f"n_false_alarms_outside is {n_false_alarms_outside}")
                        print("Three percentages are (in both, in position only, in observation only):", inBoth, inPos, inObs) 
                        print("Right match percentage is", rightMatchPercentage) 
                        print("Right guess percentage is", rightGuessPercentage) 
                        print("###########################################")

    export = ( outputFileName != "" ) 
    if export:
        try: 
            sweepDF.to_csv(outputFileName, index=False)
            if verbose:
                print("Changes to the data file has been saved to", outputFileName)
                print("###########################################")
        except Exception as e:
            print("WARNING: Cannot export data file, just returning the dataframe object ...")
            print("The error raised is: ", e)
            
    return sweepDF

# Step 1: Explore search space

In [2]:
def get_best_avg_score(df_score):
    return np.mean((df_score['right_match_percentage'] + df_score['right_guess_percentage'])/2)

In [3]:
if __name__ == "__main__":
    df_ref = run_param_sweep(verbose=False, outputFileName="output_files/parameter_sweep_full_v1.csv", resolution="fine_grained")

NameError: name 'pd' is not defined

In [None]:
if __name__ == "__main__":
    df_ref = pd.read_csv('parameter-search-results.csv')

In [None]:
# What hyperparams are most associated with accuracy?
if __name__ == "__main__":
    df_ref['match_guess_avg'] = (df_ref['right_match_percentage'] + df_ref['right_guess_percentage'])/2

In [None]:

if __name__ == "__main__":
    for y in ['right_match_percentage', 'right_guess_percentage', 'n_rewards', 'n_penalties']:
        print(f'\nCorrelating with outcome {y}')
        for x in ['duration', 'radius', 'range', 'timeframe']:
            xx = df_ref[x]
            yy = df_ref[y]
            xxm = np.ma.masked_invalid(xx)
            yym = np.ma.masked_invalid(yy)
            msk = (~xxm.mask & ~yym.mask)
            print(f'r({x}, {y}) = {round(np.corrcoef(xx[msk], yy[msk])[0][1], 3)}')
        

# Step 2: Fine-tune reward and penality with regards to size of timeframe

## Additional code from Steven

In [None]:
if __name__ == "__main__": 
    # read teacher position data
    teacherPos = pd.read_csv("teacher_position_sprint1_shou.csv", index_col=False) 
    objPos = pd.read_csv("seating_chart_x_y_sprint1_shou.csv", index_col=False) 
    obsLog = pd.read_csv("observation_distilled_sprint1_shou.tsv", sep="\t", index_col=False) 

    # parameter setting 
    duration = 15
    radius = 500
    rng = 2000

    # call stop detection utility, get starting and end timestamp for each stop 
    teacherStops = sd.getStops(teacherPos.chosen_X, teacherPos.chosen_Y, 
                            teacherPos.time_stamp, teacherPos.periodID, 
                            teacherPos.dayID, duration, radius) 

    events, centroids = getStopEvent(teacherPos, teacherStops) 
    teacherPos["event"] = events # populate event column for teacher positon dataframe 
    teacherPos["centroid"] = centroids 

    # get the objects and their correpsonding distances to centroid within rng (range) 
    subjects = getClosestObjs(teacherPos, objPos, rng)
    teacherPos["possibleSubjects"] = subjects 
