# Results Summary
## NOTE: python uses 0 index and MATLAB uses 1 index. Results correspond to python dfs
1. Session 1 Block 2: 1 extra pulse in trial 1. Fixed ✅
2. Session 1 Block 3: 4 extra pulses in trial 4. Fixed ✅
3. Session 1 Block 4: 2 extra pulses in trial 5. Fixed ✅
4. Session 2 Block 1: 1 extra pulse in trial 2. Fixed ✅. NOTE trial 1 is the correct length but may need to get looked at closer.
5. Session 2 Block 3: 4 extra pulses in trial 2. Fixed ✅.
6. Session 2 Block 5: 1 extra pulse in trial 3. Fixed ✅.
7. Session 2 Block 7: 1 extra pulse in trial 1. Not fixed ❌. Will fix if there's more time
8. Session 3 Block 1: 1 extra pulse in trial 6 and 11 extra pulses in trial 7. Fixed ✅.
9. Session 3 Block 3: 3 extra pulses in trial 1. Fixed ✅.
10. Session 3 Block 4: 1 extra pulse in trial 4. Fixed ✅. Note trial 1 is the correct length but may need to get looked at closer.
11. Session 3 Block 5: 1 extra pulse in trial 4. Fixed ✅.

## Notes:
1. Paths may differ; change them accordingly
2. This only works for extra pulses; I made no logic for missing pulses because it is harder to assume where a marker should be with high confidence than to use process of elimination to remove one.
3. This assumes we are using emo003: similarly to emo004, it appears have a missing first pulse for each trial in the dataset. This can be changed if desired by ammending `get_clips_from_behavioral()`

In [5]:
# Imports
import pandas as pd
import math

In [23]:
"""
get_clips_from_behavioral() converts the video clips into a 1D array that matches the codes used in MATLAB.

The .csv file contains information for the seven trials within the pilot study in the following form:
Block | Videos                               |
1     | [XXXX.mp4, XXXY.mp4, ... , XXYX.mp4] |
.     |               .                      |
.     |               .                      |
.     |               .                      |
7     | [XYXX.mp4, YXXX.mp4, ... , YYYX.mp4] |,

which maps the videos that were shown to an individual during each block.

params:
    session (string): the session number.
    block (string): the block number.
    emo_version (string): the dataset.

return:
    clip_copy (array): the videos within a block where two empty strings indicate the start of the next trial.

"""
def get_clips_from_behavioral(session, block, emo_version):
    
    behavioral = pd.read_csv(f"/Users/melissaosheroff/Documents/Data/emo00{emo_version}/Session {session}/Raw/Behavioral/[emo003]-s0{session}-b0{block}.csv") 
    if block == "7":
         behavioral = behavioral[len(behavioral) - 7: len(behavioral)-1]
    else:
        behavioral = behavioral[len(behavioral) - 6: len(behavioral)]


    clips, clip_copy, count = behavioral[" Block"], [], 0
    for i in clips:

        trial = (i[2:len(i)-1]).split('|')

        # Some of the behaviorals are missing a 4 in the .mp4 of the last video
        last_video = trial[len(trial) - 1]
        if len(last_video) != 0 and last_video[len(last_video) - 1] != '4':
            
            # use index 1 for the sets that do not include the first clip (004, 003)
            trial = trial[1:len(trial) -1] + [trial[len(trial) - 1] + '4']

        clip_copy = clip_copy + trial + ["", ""]

    return clip_copy

"""
get_latentcies() gets the latencies recorded during the EEG trials as well as information about an event and how 
long it occured.

Latencies are used as a unit of time (1 sec: latency_value latency).
The EGG recorded the beginning of each video as 1 pulse, the end of a trial as 2 pulses, and the crosshairs as 3 pulses.

params:
    session (string): the session number.
    block (string): the block number.
    latency_value (int): the latency value (how many measurements are taken per second).

return:
    latencies (pd.DataFrame): the duration between each EEG event.
"""
def get_latencies(session, block, latency_value):
    
    # This file is from Step1. It is the csv created and stored as EEG.event_coded
    latencies = pd.read_csv("/Users/melissaosheroff/Documents/emo003/S" + session + "/B" + block + "/event_coded_new.csv")

    latencies = latencies[["latency", "code"]]

    # Add column which translates latencies to seconds
    latencies["seconds"] = [seconds for seconds in latencies["latency"] / latency_value]

    # Add a column which will be used to find difference between seconds of consecutive clips
    latencies["delta_secs_observed"] = 0.0
    
    return latencies

"""
Creates a mapping of the videos to the duration (sec).

return:
    clip_map (dict): a map in the form {video_name_1: duration_1, ...video_name_n: duration_n}.
"""
def get_clip_map():
    template = pd.read_csv("/Users/melissaosheroff/Downloads/filtered_videos_18_emotions_mod.csv")
    template = template[['duration', 'Filename']]
    clip_map = {}
    for i in range(len(template)):
        clip_map[template["Filename"][i]] = template["duration"][i]
    return clip_map

"""
Create a dataframe which organizes each clip in the block and pairs it with how long it is in seconds.

params:
    clip_copy (array): the videos in order of which they occur in the block with ["", ""] inbetween each trial.
    clip_map (dict): the mapping of the video name to their duration in seconds.

return:
    pd.DataFrame: contains the clips in order of occurence linked to their length.
"""
def get_videos_from_behavioral(clip_copy, clip_map):
    
    seconds = []
    for value in clip_copy:
        if value not in clip_map:
            seconds.append(0)
        else:
            seconds.append(clip_map[value])
   
    return pd.DataFrame({"clip_behavioral":clip_copy, "duration_behavioral":seconds})

In [28]:
"""
Creates dataframes which can be compared looking at similar seconds between the expected time duration between clips
and the actual pulses recorded during the trials. This can be used to determine when there are extra pulses in the recording, 
but cannot be used to find missing.

params:
    session (string): the session number.
    block (string): the block number.
    emo_version (string): the dataset (emosense version).

return:
    latencies (pd.DataFrame): connects the pulses within the copilot to the duration in seconds.
    videos_behavioral (pd.DataFrame): connects the known video clips to the duration in seconds.
"""
def step_onepoint5(session, block, emo_version):
    latencies = get_latencies(session, block, 512)
    clip_copy = get_clips_from_behavioral(session, block, emo_version)
    clip_map = get_clip_map()
    videos_behavioral = get_videos_from_behavioral(clip_copy, clip_map)

    # Find the differences between consecutive pulses in seconds
    for row in range(1, len(latencies)):  

        difference = float(latencies["seconds"][row]) - float(latencies["seconds"][row - 1])

        if latencies["code"][row] != 2: 
            latencies.loc[row - 1, "delta_secs_observed"] = difference - 2
        else: 
            latencies.loc[row - 1, "delta_secs_observed"] = difference
            
        if float(latencies["seconds"][row]) - float(latencies["seconds"][row - 1]) - 2 < 0:
            latencies.loc[row - 1, "delta_secs_observed"] = difference

    latencies = latencies.reset_index(drop=True)
    return latencies, videos_behavioral

In [29]:
# Create a dataframe that organizes the latencies and clip durations
# While you have to visually see which of the pulses in the latencies is the extra,
# the sets are separated by 2 and 3 pulses which mark the end of a trial.
def join_df(latency, videos_behavioral):
    joined_df = pd.DataFrame({"code":[], "clip_behavioral":[], "delta_secs_observed":[], "duration_behavioral":[]})

    # Iterate through the two dataframes to match the clips
    latency_i = 0
    row = 0
    while row in range(len(videos_behavioral)) and latency_i in range(len(latency)):
        code = latency["code"][latency_i]

        # Check if there are extra pulses in the latency (if there are still 4's)
        if videos_behavioral["duration_behavioral"][row] != 0 or code in (2, 3):
            
            new_row = pd.DataFrame({"code":[code], "delta_secs_observed":[latency[ "delta_secs_observed"][latency_i]],
                                    "duration_behavioral":[videos_behavioral["duration_behavioral"][row]],
                                    "clip_behavioral":[videos_behavioral["clip_behavioral"][row]]})
            joined_df = pd.concat([joined_df, new_row], ignore_index=True)
            row = row + 1
            latency_i = latency_i + 1
            
        else:

            # Before starting the next trial, add rows to mark the extra pulses
            while code != 2 and code != 3:
                new_row = pd.DataFrame({"code":[code], "delta_secs_observed":[latency[ "delta_secs_observed"][latency_i]],
                                            "duration_behavioral":["IN TRIAL ^"],
                                            "clip_behavioral":[""]})
                joined_df = pd.concat([joined_df, new_row], ignore_index=True)
                latency_i = latency_i + 1
                code = latency["code"][latency_i]
                
    return joined_df

In [30]:
# Removed index 0 (matlab's 1). NOTE Trial 2 is the correct number of clips, but differing lengths
latency, videos_behavioral = step_onepoint5("1", "2", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,0800.mp4,15.681152,15.7
1,4.0,0528.mp4,3.123047,3.07
2,4.0,0322.mp4,10.355957,10.4
3,4.0,0413.mp4,2.018066,1.96
4,4.0,1157.mp4,7.844238,7.92
5,4.0,1300.mp4,4.87207,5.08
6,2.0,,19.509766,0.0
7,3.0,,38.45752,0.0
8,1.0,0173.mp4,21.504883,12.04
9,4.0,0017.mp4,15.980469,8.75


In [15]:
# Removed index 21 - 24 (22-25 in matlab)
latency, videos_behavioral, clip_copy = step_onepoint5("1", "3", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,0246.mp4,15.176758,15.12
1,4.0,0626.mp4,11.058105,11.08
2,4.0,1356.mp4,15.050293,15.12
3,2.0,,10.45459,0.0
4,3.0,,31.955566,0.0
5,1.0,1856.mp4,9.34668,9.3
6,4.0,2113.mp4,15.173828,15.12
7,4.0,1580.mp4,8.246094,8.25
8,4.0,0292.mp4,3.123047,3.18
9,4.0,0388.mp4,4.989258,5.05


In [16]:
# Removed 35 & 36 (matlab 36, 37)
latency, videos_behavioral, clip_copy = step_onepoint5("1", "4", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,0310.mp4,3.82666,3.78
1,4.0,1316.mp4,5.231934,5.2
2,4.0,0540.mp4,5.834961,5.87
3,4.0,0812.mp4,1.816895,1.8
4,4.0,1649.mp4,17.887207,17.9
5,4.0,1799.mp4,2.243164,2.4
6,2.0,,8.730469,0.0
7,3.0,,25.716797,0.0
8,1.0,0007.mp4,13.758789,13.7
9,4.0,0988.mp4,3.120605,3.21


In [21]:
# Removed 6 (Matlab 7) NOTE trial 1 matches in length but not in video durations
latency, videos_behavioral, clip_copy = step_onepoint5("2", "1", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,0894.mp4,26.630859,15.12
1,4.0,1992.mp4,18.092285,10.0
2,4.0,1524.mp4,16.182617,8.84
3,4.0,0011.mp4,3.414551,2.1
4,2.0,,10.074707,0.0
5,3.0,,22.501465,0.0
6,1.0,0273.mp4,3.621582,3.6
7,4.0,1363.mp4,2.015625,2.0
8,4.0,1900.mp4,3.420898,3.36
9,4.0,0618.mp4,8.240234,8.3


In [126]:
# Removed indices 8-11 and 13.
latency, videos_behavioral, clip_copy = step_onepoint5("2", "3", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,0032.mp4,15.077148,15.03
1,4.0,0412.mp4,27.130371,27.2
2,4.0,2145.mp4,4.956055,5.0
3,2.0,,5.190918,0.0
4,3.0,,32.54248,0.0
5,1.0,0399.mp4,6.133301,6.08
6,4.0,0591.mp4,14.568848,14.64
7,4.0,0472.mp4,5.231934,4.5
8,4.0,0206.mp4,8.144531,8.8
9,4.0,1159.mp4,4.486816,4.6


In [130]:
# Removed 19 (Matlab 20)
latency, videos_behavioral, clip_copy = step_onepoint5("2", "5", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,1848.mp4,2.419922,2.48
1,4.0,1833.mp4,0.913086,0.9
2,4.0,0533.mp4,1.817383,1.8
3,4.0,1352.mp4,3.022949,3.04
4,4.0,1794.mp4,0.110352,0.15
5,4.0,1846.mp4,1.615723,1.53
6,4.0,1018.mp4,1.91748,2.0
7,4.0,0484.mp4,16.282715,16.2
8,4.0,0303.mp4,1.104492,1.2
9,2.0,,12.965332,0.0


In [165]:
# index 0 AND INCLUDE 45-48 (ML 46-49) FIXME
latency, videos_behavioral, clip_copy = step_onepoint5("2", "7", "3")
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,1439.mp4,1.61377,1.6
1,4.0,1573.mp4,2.01709,1.96
2,4.0,2181.mp4,1.214355,1.2
3,4.0,0055.mp4,6.036133,6.1
4,4.0,1314.mp4,2.822266,2.8
5,4.0,2025.mp4,9.852051,9.8
6,4.0,1400.mp4,1.515625,1.54
7,4.0,0881.mp4,4.831055,4.88
8,4.0,1151.mp4,3.123047,3.06
9,4.0,1421.mp4,3.324219,3.3


In [170]:
# remove index 36 (matlab 37) and 45-55 (matlab 46-56) 
latency, videos_behavioral, clip_copy = step_onepoint5("3", "1", "3")
pd.set_option('display.max_rows', None)
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,1583.mp4,13.664551,13.68
1,4.0,0029.mp4,17.984863,17.96
2,4.0,2108.mp4,1.917969,2.0
3,4.0,0060.mp4,5.306641,5.34
4,2.0,,11.103516,0.0
5,3.0,,25.021973,0.0
6,1.0,0180.mp4,1.615234,1.6
7,4.0,1616.mp4,1.012207,1.0
8,4.0,1071.mp4,2.017578,2.04
9,4.0,1688.mp4,1.414062,1.4


In [183]:
# Removed index 1-3 (2-4 in matlab)
latency, videos_behavioral, clip_copy = step_onepoint5("3", "3", "3")
pd.set_option('display.max_rows', None)
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,0134.mp4,3.423828,3.43
1,4.0,1065.mp4,6.73877,6.72
2,4.0,0347.mp4,6.035645,6.0
3,4.0,1533.mp4,1.816895,1.9
4,4.0,1535.mp4,3.022949,3.0
5,4.0,0788.mp4,15.049316,15.13
6,2.0,,5.869141,0.0
7,3.0,,31.754883,0.0
8,1.0,1935.mp4,1.414062,1.4
9,4.0,1666.mp4,14.970215,15.03


In [56]:
# Removed index 22 (matlab 23). Double check first trial
latency, videos_behavioral, clip_copy = step_onepoint5("3", "4", "3")
pd.set_option('display.max_rows', None)
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,1000.mp4,13.469238,7.2
1,4.0,0194.mp4,11.561523,6.09
2,4.0,0820.mp4,5.23291,2.36
3,4.0,0469.mp4,10.255371,5.28
4,4.0,0574.mp4,11.058594,5.85
5,4.0,0858.mp4,9.753906,5.0
6,4.0,1091.mp4,18.19043,10.08
7,4.0,0462.mp4,5.005859,3.0
8,2.0,,6.622559,0.0
9,3.0,,25.214355,0.0


In [42]:
# Removed 29 (matlab 30) 
latency, videos_behavioral, clip_copy = step_onepoint5("3", "5", "3")
pd.set_option('display.max_rows', None)
display(join_df(latency, videos_behavioral))

Unnamed: 0,code,clip_behavioral,delta_secs_observed,duration_behavioral
0,1.0,1639.mp4,6.236328,6.3
1,4.0,1762.mp4,2.52002,2.4
2,4.0,1483.mp4,6.235352,6.3
3,4.0,1515.mp4,1.104492,1.2
4,2.0,,9.884766,0.0
5,3.0,,23.299805,0.0
6,1.0,1746.mp4,5.328125,5.28
7,4.0,2071.mp4,15.06543,15.08
8,4.0,2179.mp4,13.769531,13.8
9,4.0,0859.mp4,4.529297,4.5
