In [1]:
import os
import glob
import numpy as np
import pandas as pd
from scipy.stats import poisson_means_test
import plotly.graph_objects as go

# Helpful Functions

### Functions for plotting

Function to plot the heart rate over the course of the video.

In [2]:
def plot_heart_beat(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df['HR'], mode='lines', name='Heart Rate'))
    fig.update_layout(
        title="Heart Rate",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Heart Rate (bpm)",
            titlefont_size=20,
            tickfont_size=20,
        ),
    )
    fig.show()

Function to plot the heart rate distribution (data is for 1 video).

In [3]:
def plot_heat_beat_distribution(df):
    # plot a distribution of the heart rate
    fig = go.Figure(data=[go.Histogram(x=df["HR"], nbinsx=26)])
    fig.update_layout(
        title_text="Heart Rate Distribution",
        xaxis_title_text="Heart Rate (bpm)",
        yaxis_title_text="Frequency",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title_font=dict(size=20),
            tickfont=dict(size=20),
        ),
        yaxis=dict(
            title_font=dict(size=20),
            tickfont=dict(size=20),
        ),
    )
    fig.show()

Function to mark the elevated heart rates on the heart rate plot.

In [4]:
def plot_elevated_heart_rates(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df["HR"], mode="lines", name="Heart Rate"))
    fig.add_trace(
        go.Scatter(
            x=df[df["elevated_hr"]].index,
            y=df[df["elevated_hr"]]["HR"],
            mode="markers",
            name="Elevation HR",
            marker=dict(color="red"),
        )
    )
    fig.update_layout(
        title="Elevated Heart Rates",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Heart Rate (bpm)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig.show()

Function to plot the moving average of the heart rate.

In [5]:
def plot_moving_average_heart_rate(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df["average_HR"], mode='lines', name='Moving Average'))
    fig.update_layout(
        title="Heart Rate Moving Average",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Heart Rate (bpm)",
            titlefont_size=20,
            tickfont_size=20,
        ),
    )
    fig.show()

Function to mark the spiking heart rates on the moving average plot

In [6]:
def plot_spiking_heart_rates(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df["average_HR"], mode='lines', name='Moving Average'))
    # color the spiking heart rate
    fig.add_trace(go.Scatter(x=df[df["spiking_hr"]]["vidts"].dt.total_seconds(), y=df[df["spiking_hr"]]["average_HR"], mode='markers', name='Spiking HR', marker=dict(color='red')))
    fig.update_layout(
        title="Spiking Heart Rate",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Heart Rate (bpm)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig.show()

Function to plot the flagged elevated heart rates

In [7]:
def plot_flagged_elevated_heart_rates(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df["elevated_hr"].astype(int), mode='lines', name='Elevated HR', marker=dict(color='red')))
    fig.update_layout(
        title="Elevated Heart Rates",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Heart Rate (bpm)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig.show()

Function to plot the flagged spiking heart rates

In [8]:
def plot_flagged_spiking_heart_rates(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df["spiking_hr"].astype(int), mode='lines', name='Spiking HR', marker=dict(color='red')))
    fig.update_layout(
        title="Spiking Heart Rates",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Heart Rate (bpm)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig.show()

Function to plot the flagged intersection events

In [9]:
def plot_flagged_intersection_events(flagged_video_data):
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=flagged_video_data.index.total_seconds(),
            y=flagged_video_data["intersection"].astype(int),
            mode="lines",
            name="Intersection",
        )
    )
    fig.update_layout(
        title="Intersection Events",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Intersection Flag",
            titlefont_size=20,
            tickfont_size=20,
        ),
    )
    fig.show()

Function to plot the distribution of heart rate anomalies

In [10]:
def plot_heart_rate_anomalies(n1, n2, intersection_true_elevations, intersection_false_elevations, intersection_true_spikes, intersection_false_spikes):
    print(f'The sample size of heart rate anomalies at intersections is {n1}')
    print(f'The sample size of heart rate anomalies not at intersections is {n2}')

    print("Elevation")
    fig = go.Figure()
    fig.add_trace(
        go.Histogram(
            x=intersection_true_elevations,
            nbinsx=25,
            histnorm="probability density",
            name="At Intersection",
        )
    )
    fig.add_trace(
        go.Histogram(
            x=intersection_false_elevations,
            nbinsx=25,
            histnorm="probability density",
            name="Not at Intersection",
        )
    )
    fig.update_layout(
        # title_text="Probability Density of Heart Rate Elevation, Video " + str(video_number),
        xaxis_title_text="Heart Rate (bpm)",
        yaxis_title_text="Probability Density",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig.show()

    print("Spikes")
    fig = go.Figure()
    fig.add_trace(
        go.Histogram(
            x=intersection_true_spikes,
            nbinsx=25,
            histnorm="probability density",
            name="At Intersection",
        )
    )
    fig.add_trace(
        go.Histogram(
            x=intersection_false_spikes,
            nbinsx=25,
            histnorm="probability density",
            name="Not at Intersection",
        )
    )
    fig.update_layout(
        # title_text="Probability Density of Heart Rate Spikes, Video " + str(video_number),
        xaxis_title_text="Heart Rate (bpm)",
        yaxis_title_text="Probability Density",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig.show()

<div>

### Finding elevated and lower heart rates

Looks for the periods where the heart rate is signifigantly elevated.

- An elevated heart rate is defined as a heart rate that is above a certain threshold. 
- The threshold was set to $\mu + \sigma$, where $\mu$ is the mean and $\sigma$ is the standard deviation of a Gaussian function fit to the drivers heart rate distribution.
- The Gaussian function is defined by its mean (μ) and standard deviation (σ). The mean represents the center of the distribution, while the standard deviation determines the spread or dispersion of the data points around the mean.
- Formally: $f(x) = \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{1}{2}(\frac{x-\mu}{\sigma})^2}$
- Example:

<!-- Centered image with reduced size -->
<div style="text-align:center">
    <img src="Normal_Distribution.png" style="width:50%;"/>
</div>
(Image by Inductiveload - self-made, Mathematica, Inkscape, Public Domain, <a href="https://commons.wikimedia.org/w/index.php?curid=3817954">Link</a>)

- $threshold = \mu + \sigma$
- Data is **flagged when:** the heart rate is above the threshold, and **unflagged when:** the heart rate is below the threshold.

In [11]:
class Guassian:
    def __init__(self, df = None):
        # define the parameters of the guassian distribution
        self.df = df
        self.mu = None
        self.sigma = None

        if df is not None:
            self.fit_guassian_distribution(df)

    def fit_guassian_distribution(self, df):
        self.mu = df.mean()
        self.sigma = df.std()
        return self.mu, self.sigma

    def get_probability(self, x):
        return 1/(np.sqrt(2*np.pi)*self.sigma)*np.exp(-1/2*((x-self.mu)/self.sigma)**2)

    def log_likelihood(self, x):
        return np.log(self.get_probability(x)).sum()

    def plot_guassian_distribution(self):
        # plot the gaussian distribution with the data
        x = np.linspace(self.df.min(), self.df.max(), 100)
        y = self.get_probability(x)

        fig = go.Figure()
        fig.add_trace(go.Histogram(x=self.df, nbinsx=25, histnorm='probability density', name='Heart Rate Distribution'))
        fig.add_trace(go.Scatter(x=x, y=y, mode = "lines", name='Gaussian Distribution'))
        fig.add_trace(go.Scatter(x=[self.mu, self.mu], y=[0, self.get_probability(self.mu)], mode='lines', name='Mean'))
        fig.update_layout(
            title="Heart Rate Distribution",
            title_font_size=30,
            title_x=0.5,
            plot_bgcolor="rgba(0,0,0,0)",
            paper_bgcolor="rgba(0,0,0,0)",
            xaxis=dict(
                title="Heart Rate (bpm)",
                titlefont_size=20,
                tickfont_size=20,
            ),
            yaxis=dict(
                title="Probability Density",
                titlefont_size=20,
                tickfont_size=20,
            ),
            legend=dict(
                x=0,
                y=1,
                bgcolor="rgba(0, 0, 0, 0)",
            ),
        )
        fig.show()

    def plot_guassian_distribution_with_threshold(self, elevation):
        # plot the gaussian distribution with the data
        x = np.linspace(self.df.min(), self.df.max(), 100)
        y = self.get_probability(x)

        fig = go.Figure()
        fig.add_trace(go.Histogram(x=self.df, nbinsx=25, histnorm='probability density', name='Heart Rate Distribution'))
        fig.add_trace(go.Scatter(x=x, y=y, mode = "lines", name='Gaussian Distribution'))
        fig.add_trace(go.Scatter(x=[self.mu, self.mu], y=[0, self.get_probability(self.mu)], mode='lines', name='Mean'))
        fig.add_trace(go.Scatter(x=[elevation, elevation], y=[0, self.get_probability(self.mu)], mode='lines', name='Threshold'))
        fig.update_layout(
            title="Heart Rate Distribution with Threshold",
            title_font_size=30,
            title_x=0.5,
            plot_bgcolor="rgba(0,0,0,0)",
            paper_bgcolor="rgba(0,0,0,0)",
            xaxis=dict(
                title="Heart Rate (bpm)",
                titlefont_size=20,
                tickfont_size=20,
            ),
            yaxis=dict(
                title="Probability Density",
                titlefont_size=20,
                tickfont_size=20,
            ),
            legend=dict(
                x=0,
                y=1,
                bgcolor="rgba(0, 0, 0, 0)",
            ),
        )
        fig.show()

In [12]:
def find_and_flag_elevated_heart_rate(df, plot_and_print=False):

    # fit a gaussian mixture model to the data
    hr = df['HR'].to_numpy()
    guassian = Guassian(hr)

    if plot_and_print:
        print('👗 Fitting a Guassian plot to the data')
        guassian.plot_guassian_distribution()

    # calculate the elevation threshold
    elevation_threshold = guassian.mu + 2 * guassian.sigma

    if plot_and_print:
        print(f"Mean: {guassian.mu}, Sigma: {guassian.sigma}")
        print(f"Flagging elevated heart rate above: {elevation_threshold}")
        print(f"🏔️Plotting the distribution with the elevation threshold")
        guassian.plot_guassian_distribution_with_threshold(elevation_threshold)

    # grab the vidts if the heart rate is above the elevation threshold
    df["elevated_hr"] = df["HR"] > elevation_threshold

    if plot_and_print:
        print(f'⛳️Flagging {df["elevated_hr"].sum()} elevated heart rate events')
        plot_elevated_heart_rates(df)

    return df

<br>

### Finding heart rate spikes and falls

Looks for looking for periods where the heart rate spikes significantly. 
- A "spike" is defined as the point at which the heart rates rate of change exceeds a certain threshold. 
- A moving average is calculated for the heart rate data. The moving average is calculated using a window of 5 seconds.
- The threshold is set to the 90th percentile of all slope values.
- Data is **flagged when**: the slope exceeds the threshold, and **unflagged when:** the slope is below this threshold.

In [13]:
def find_and_flag_spiking_heart_rate(df, plot_and_print=False, slope_threshold=0.90):

    # moving average over 5 seconds
    df["average_HR"] = df["HR"].rolling(window=50).mean()

    if plot_and_print:
        print('🚶‍♂️‍➡️Plotting the moving average of the heart rate')
        plot_moving_average_heart_rate(df)
        print(f"Finding the steepest {slope_threshold*100}% of slopes")

    # find the slope
    df["average_HR_slope"] = df["average_HR"].diff()

    # calculate the spiking threshold
    activation_threshold = df["average_HR_slope"].quantile(slope_threshold)        

    # mark all the spikes
    df["spiking_hr"] = df["average_HR_slope"] > activation_threshold

    if plot_and_print:
        print(f'Spiking threshold is {activation_threshold}')
        print(f'🏁Flagging {df["spiking_hr"].sum()} spiking heart rate events')
        plot_spiking_heart_rates(df)

    return df

<br>

### Processing Each Participant's Data for the Video

In [14]:
def process_participant_data_for_video(video_data_path, show_all=False, plot_and_print=True):
    """
    Process participant data for a video.
    Parameters:
    - video_data_path (str): The path to the video data.
    - show_all (bool, optional): Whether to show all plots and prints. Default is False.
    - plot_and_print (bool, optional): Whether to plot and print the data. Default is True.
    Returns:
    - participants (dict): A dictionary containing processed data for each participant.
    - flagged_participant_data (DataFrame): A DataFrame containing flagged data for each participant.
    """
    participants = {}
    flagged_participant_data = pd.DataFrame()

    # loop through all participants
    for participant in glob.glob(video_data_path + "*"):
        if "_data" in participant:
            participant_id = participant.replace(video_data_path, "").replace(".csv", "")

            if plot_and_print:
                print(f"\n📥 Processing participant {participant_id}")

            # import data
            df = pd.read_csv(participant)
            df["vidts"] = pd.to_timedelta(df["vidts"], unit="s")

            # plot the heart rate
            if plot_and_print:
                print("\n📈 Plotting the heart rate over time:")
                plot_heart_beat(df)
                print("\n📊 Plotting the heart rate distribution:")
                plot_heat_beat_distribution(df)

            # find and flag the elevated and spiking heart rates
            df = find_and_flag_elevated_heart_rate(df, plot_and_print)
            df = find_and_flag_spiking_heart_rate(df, plot_and_print)

            # add to the dictionary
            participants[participant_id.replace(".csv", "")] = df

            if plot_and_print:
                # plot flagged data
                print("\n🚩 Plotting flagged data:")
                plot_flagged_elevated_heart_rates(df)
                plot_flagged_spiking_heart_rates(df)
                print(f'{"-" * 50}\n\n')

            if not show_all:
                plot_and_print = False

            # add the flagged data as columns
            flagged_participant_data[participant_id + "_elevated_hr"] = df["elevated_hr"]
            flagged_participant_data[participant_id + "_spiking_hr"] = df["spiking_hr"]

    flagged_participant_data.index = participants[list(participants.keys())[0]]["vidts"]

    return participants, flagged_participant_data

<br>

### Video Data: Manual Labels -> Time Series Data

Converts a time range in the format 'HH:MM-HH:MM' into a tuple of pandas Timedelta objects.

In [15]:
def parse_timedelta_range(time_range):
    start, end = time_range.split("-")
    return (pd.to_timedelta('00:' + start), pd.to_timedelta('00:' + end))

Extracts and catagories the video labels.

In [16]:
def extract_and_flag_video_events(video_data_path, flagged_video_data, plot_and_print=True):
    """
    Extracts intersection events from a CSV file and flags them in a DataFrame.
    Parameters:
    - video_data_path (str): The file path of the CSV file containing video data.
    - flagged_video_data (DataFrame): The DataFrame where the flagged video data will be stored.
    Returns:
    - flagged_video_data (DataFrame): The updated DataFrame with intersection events flagged.
    """

    # Load the CSV file
    data = pd.read_csv(video_data_path)

    # Initialize lists for each type of event
    intersections = []
    traffic_conflict = []
    left_turn = []

    # adding video flags to the participant data
    for _, row in data.iterrows():
        time_range = parse_timedelta_range(row["Timestamp:"])

        if pd.notna(row["Intersections:"]):
            intersections.append(time_range)
        if pd.notna(row["Traffic Conflict:"]):
            traffic_conflict.append(time_range)
        if pd.notna(row["Left turn:"]):
            left_turn.append(time_range)

    flagged_video_data["intersection"] = False
    flagged_video_data["traffic_conflict"] = False
    flagged_video_data["left_turn"] = False

    for start, end in intersections:
        flagged_video_data.loc[
            (flagged_video_data.index >= start) & (flagged_video_data.index <= end),
            "intersection",
        ] = True

    for start, end in traffic_conflict:
        flagged_video_data.loc[
            (flagged_video_data.index >= start) & (flagged_video_data.index <= end),
            "traffic_conflict",
        ] = True

    for start, end in left_turn:
        flagged_video_data.loc[
            (flagged_video_data.index >= start) & (flagged_video_data.index <= end),
            "left_turn",
        ] = True

    if plot_and_print:
        print("🚦 Plotting Intersection events")
        plot_flagged_intersection_events(flagged_video_data)
        
    return flagged_video_data

<br>

### E Test

Performs a E-test on flagged video data to determine if there is a significant difference in heart rate during certain flagged conditions. 
- Statistical significance is determined by a p-value less than 0.05.
- Separates the data into two groups based on the flag condition. (e.g. elevated heart rate vs. normal heart rate)
- Calculates the sum of heart rate anomalies for each group.
- Performs a e-test.
- If the p-value from the e-test is less than 0.05, it indicates a significant difference in heart rate during the flagged condition.

In [17]:
def etest(flagged_video_data, flag_id, hr_anomaly):
    # find the data with the condition
    sample_1_data = flagged_video_data[flagged_video_data[flag_id]].filter(like=hr_anomaly).sum(axis=1)

    # find the data without any condition
    road_condition_flags = ['intersection', 'traffic_conflict', 'left_turn']
    sample_2_data = flagged_video_data[~flagged_video_data[road_condition_flags].any(axis=1)].filter(like=hr_anomaly).sum(axis=1)
    t_stat, p_val = poisson_means_test(sample_1_data.sum(), sample_1_data.count(), sample_2_data.sum(), sample_2_data.count())

    if p_val < 0.05:
        print(f'There is a significant difference between a {hr_anomaly.replace("_hr", " heart rate")} during/within a(n) {flag_id}: The p-value is {p_val}. ')
    elif not np.isnan(p_val):
        print(f'There is no significant difference between {hr_anomaly.replace("_hr", " heart rate")} during/within a(n) {flag_id}: The p-value is {p_val}. ')
    return t_stat, p_val

Performs a t test for every condition.

In [18]:
def determine_statistical_significance(flagged_video_data):
    # E-test: https://github.com/scipy/scipy/blob/v1.14.1/scipy/stats/_hypotests.py#L151-L333
    intersection_elevated_p_val = etest(flagged_video_data, "intersection", "elevated_hr")
    intersection_spiking_p_val = etest(flagged_video_data, "intersection", "spiking_hr")
    # traffic_conflict_elevated_p_val = etest(flagged_video_data, "traffic_conflict", "elevated_hr")
    # traffic_conflict_spiking_p_val = etest(flagged_video_data, "traffic_conflict", "spiking_hr")

    return {
        "intersection_elevated_p_val": intersection_elevated_p_val,
        "intersection_spiking_p_val": intersection_spiking_p_val,
        # "traffic_conflict_elevated_p_val": traffic_conflict_elevated_p_val,
        # "traffic_conflict_spiking_p_val": traffic_conflict_spiking_p_val,
    }

Plots the distribution of heart rate anomalies for each condition to visually ensure that the data is normally distributed, and the variances are equal.

In [19]:
def checking_t_test_assumptions(df, video_number, plot_and_print=True):
    # series of intersections
    intersection = df["intersection"]

    # Sum of heart rate anomalies
    elevated_hr = df.filter(like="elevated_hr").sum(axis=1)
    spiking_hr = df.filter(like="spiking_hr").sum(axis=1)

    # Sum of heart rate anomalies at intersections
    intersection_true_elevations = elevated_hr[intersection]
    intersection_true_spikes = spiking_hr[intersection]
    intersection_true_anomalies = intersection_true_elevations + intersection_true_spikes 

    # Sum of heart rate anomalies not at intersections
    intersection_false_spikes = spiking_hr[~intersection]
    intersection_false_elevations = elevated_hr[~intersection]
    intersection_false_anomalies = intersection_false_elevations + intersection_false_spikes 

    # sample size
    n1 = len(intersection_true_anomalies)
    n2 = len(intersection_false_anomalies)

    if plot_and_print:
        print(f"📊 Plotting the distribution of heart rate anomalies at intersections and not at intersections for video {video_number}")
        plot_heart_rate_anomalies(n1, n2, intersection_true_elevations, intersection_false_elevations, intersection_true_spikes, intersection_false_spikes)

<br>

---

# Results

In [20]:
# show all
show_all = False

# videos
videos = [2,5,7,10,11,12,14]

# one dataframe to store the data for all the videos
all_flagged_data = pd.DataFrame()
for video in videos:
    print(f'{"-" * 50}')
    print(f'Processing the data for Video {video}')
    print(f'{"-" * 50}')

    # set the path to the video data
    if os.name == 'nt':
        video_data_path = f"./data/Video_Data/Video_{video}\\"
    else:
        video_data_path = f"./data/Video_Data/Video_{video}/"

    # process the participant data for the video
    if video == 2:
        _, flagged_process_participant_data_for_video = process_participant_data_for_video(video_data_path, plot_and_print=True)
    else:
        _, flagged_process_participant_data_for_video = process_participant_data_for_video(video_data_path, plot_and_print=show_all)

    # Extract the video labels
    video_file_path = f"./data/Video_Data/Video_{video}/Video{video}.csv"
    # extract and flag the video data
    if video == 2:
        flagged_process_participant_data_for_video = extract_and_flag_video_events(video_file_path, flagged_process_participant_data_for_video, plot_and_print=True)
    else:
        flagged_process_participant_data_for_video = extract_and_flag_video_events(video_file_path, flagged_process_participant_data_for_video, plot_and_print=show_all)

    flagged_process_participant_data_for_video["video"] = video

    # add the video data to bottom of the dataframe
    all_flagged_data = pd.concat([all_flagged_data, flagged_process_participant_data_for_video], axis=0)

    # e-test
    if video == 2:
        checking_t_test_assumptions(flagged_process_participant_data_for_video, video, plot_and_print=True)
    else:
        checking_t_test_assumptions(flagged_process_participant_data_for_video, video, plot_and_print=show_all)

    intersection_elevated_p_val, intersection_spiking_p_val = determine_statistical_significance(flagged_process_participant_data_for_video)
    # intersection_elevated_p_val, intersection_spiking_p_val, traffic_conflict_elevated_p_val, traffic_conflict_spiking_p_val = determine_statistical_significance(flagged_process_participant_data_for_video)

    print()

--------------------------------------------------
Processing the data for Video 2
--------------------------------------------------

📥 Processing participant 09_data

📈 Plotting the heart rate over time:



📊 Plotting the heart rate distribution:


👗 Fitting a Guassian plot to the data


Mean: 77.79913342961893, Sigma: 5.02471210351651
Flagging elevated heart rate above: 87.84855763665195
🏔️Plotting the distribution with the elevation threshold


⛳️Flagging 255 elevated heart rate events


🚶‍♂️‍➡️Plotting the moving average of the heart rate


Finding the steepest 90.0% of slopes
Spiking threshold is 0.18000000000000682
🏁Flagging 808 spiking heart rate events



🚩 Plotting flagged data:


--------------------------------------------------


🚦 Plotting Intersection events


📊 Plotting the distribution of heart rate anomalies at intersections and not at intersections for video 2
The sample size of heart rate anomalies at intersections is 3440
The sample size of heart rate anomalies not at intersections is 5561
Elevation


Spikes


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 5.439483255299706e-14. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 5.163991388575154e-05. 

--------------------------------------------------
Processing the data for Video 5
--------------------------------------------------
There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 1.0132952889392555e-16. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 0.0026214864525893056. 

--------------------------------------------------
Processing the data for Video 7
--------------------------------------------------
There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 2.536903688318643e-09. 
There is a significant difference between a spiking

### Exploring the Data

In [26]:
for video in all_flagged_data["video"].unique():
    # filter the flagged data for the video
    flagged_video_data = all_flagged_data[all_flagged_data["video"] == video]

    intersection_true = flagged_video_data[flagged_video_data["intersection"]]
    intersection_false = flagged_video_data[~flagged_video_data["intersection"]]

    # Create the first figure for Elevated HR
    print(f"Elevated Heart Rate Anomalies During Intersection Events, Video {video}")
    fig_elevated = go.Figure()

    # Plot histogram for Elevated HR when intersection is True
    fig_elevated.add_trace(
        go.Histogram(
            x=intersection_true.index.total_seconds(),
            y=intersection_true.filter(like="elevated_hr").sum(axis=1),
            name="Elevated HR (Intersection)",
            marker=dict(color="red"),
            histfunc="sum",
            nbinsx=50,  # Adjust the number of bins for desired granularity
        )
    )

    # Plot histogram for Elevated HR when intersection is False
    fig_elevated.add_trace(
        go.Histogram(
            x=intersection_false.index.total_seconds(),
            y=intersection_false.filter(like="elevated_hr").sum(axis=1),
            name="Elevated HR (No Intersection)",
            marker=dict(color="orange"),
            histfunc="sum",
            nbinsx=50,  # Adjust the number of bins for desired granularity
            opacity=0.75,  # To make the bars slightly transparent for overlap
        )
    )

    # Update layout for Elevated HR plot
    fig_elevated.update_layout(
        barmode="overlay",  # This allows the histograms to overlay
        # title=f"Elevated Heart Rate Anomalies During Intersection Events, Video {video}",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Sum of Elevated Heart Rates",
            titlefont_size=20,
            tickfont_size=20,
        ),
        showlegend=True,
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )
    fig_elevated.show()

    # Create the second figure for Spiking HR
    print(f"Spiking Heart Rate Anomalies During Intersection Events, Video {video}")
    fig_spiking = go.Figure()

    # Plot histogram for Spiking HR when intersection is True
    fig_spiking.add_trace(
        go.Histogram(
            x=intersection_true.index.total_seconds(),
            y=intersection_true.filter(like="spiking_hr").sum(axis=1),
            name="Spiking HR (Intersection)",
            marker=dict(color="green"),
            histfunc="sum",
            nbinsx=50,  # Adjust the number of bins for desired granularity
        )
    )

    # Plot histogram for Spiking HR when intersection is False
    fig_spiking.add_trace(
        go.Histogram(
            x=intersection_false.index.total_seconds(),
            y=intersection_false.filter(like="spiking_hr").sum(axis=1),
            name="Spiking HR (No Intersection)",
            marker=dict(color="blue"),
            histfunc="sum",
            nbinsx=50,  # Adjust the number of bins for desired granularity
            opacity=0.75,  # To make the bars slightly transparent for overlap
        )
    )

    # Update layout for Spiking HR plot
    fig_spiking.update_layout(
        barmode="overlay",  # This allows the histograms to overlay
        # title=f"Spiking Heart Rate Anomalies During Intersection Events, Video {video}",
        title_font_size=30,
        title_x=0.5,
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        xaxis=dict(
            title="Time (Seconds)",
            titlefont_size=20,
            tickfont_size=20,
        ),
        yaxis=dict(
            title="Sum of Spiking Heart Rates",
            titlefont_size=20,
            tickfont_size=20,
        ),
        showlegend=True,
        legend=dict(
            x=0,
            y=1,
            bgcolor="rgba(0, 0, 0, 0)",
        ),
    )

    # Show the second plot
    fig_spiking.show()

Elevated Heart Rate Anomalies During Intersection Events, Video 2


Spiking Heart Rate Anomalies During Intersection Events, Video 2


Elevated Heart Rate Anomalies During Intersection Events, Video 5


Spiking Heart Rate Anomalies During Intersection Events, Video 5


Elevated Heart Rate Anomalies During Intersection Events, Video 7


Spiking Heart Rate Anomalies During Intersection Events, Video 7


Elevated Heart Rate Anomalies During Intersection Events, Video 10


Spiking Heart Rate Anomalies During Intersection Events, Video 10


Elevated Heart Rate Anomalies During Intersection Events, Video 11


Spiking Heart Rate Anomalies During Intersection Events, Video 11


Elevated Heart Rate Anomalies During Intersection Events, Video 12


Spiking Heart Rate Anomalies During Intersection Events, Video 12


Elevated Heart Rate Anomalies During Intersection Events, Video 14


Spiking Heart Rate Anomalies During Intersection Events, Video 14


In [22]:
# how long is an iteration
iteration_length = all_flagged_data.index[1] - all_flagged_data.index[0]
iteration_length

Timedelta('0 days 00:00:00.099977970')

In [23]:
# sum of elevated heart rate anomalies
heart_rate_elevations = all_flagged_data.filter(like='elevated_hr').diff().fillna(all_flagged_data.iloc[0]).sum().sum() / 2
print(f'The sum of elevated heart rate anomalies is {heart_rate_elevations}')
# the sum of iteratoins where the heart rate is elevated
iterations_with_elevated_heart_rate = all_flagged_data.filter(like="elevated_hr").sum().sum()
print(f'The sum of iterations with elevated heart rate is {iterations_with_elevated_heart_rate}')

# average length of time the heart rate is elevated
print(f'The average length of time the heart rate is elevated is {((iterations_with_elevated_heart_rate / heart_rate_elevations) * iteration_length).total_seconds()} seconds')

The sum of elevated heart rate anomalies is 27757.0
The sum of iterations with elevated heart rate is 33309
The average length of time the heart rate is elevated is 0.119975 seconds


In [24]:
# sum of spiking heart rate anomalies
heart_rate_spikes = all_flagged_data.filter(like='spiking_hr').diff().fillna(all_flagged_data.iloc[0]).sum().sum() / 2
print(f'The sum of spiking heart rate anomalies is {heart_rate_spikes}')
# the sum of iteratoins where the heart rate is spiking
iterations_with_spiking_heart_rate = all_flagged_data.filter(like="spiking_hr").sum().sum()
print(f'The sum of iterations with spiking heart rate is {iterations_with_spiking_heart_rate}')

# average length of time the heart rate is spiking
print(f'The average length of time the heart rate is spiking is {((iterations_with_spiking_heart_rate / heart_rate_spikes) * iteration_length).total_seconds()} seconds')

The sum of spiking heart rate anomalies is 3903.0
The sum of iterations with spiking heart rate is 71798
The average length of time the heart rate is spiking is 1.839154 seconds


In [25]:
# sum of intersections
intersections = all_flagged_data["intersection"].diff().fillna(all_flagged_data.iloc[0]).sum() / 2
print(f'The total amount of intersections is {intersections}')
# the some of iterations where there is an intersection
iterations_with_intersections = all_flagged_data["intersection"].sum()
print(f'The sum of iterations with intersections is {iterations_with_intersections}')

# average length of time there is an intersection
average_iteration_for_intersection = iterations_with_intersections / intersections
print(f'The average time for an intersection is {average_iteration_for_intersection * iteration_length.total_seconds()}')

The total amount of intersections is 170.0
The sum of iterations with intersections is 26772
The average time for an intersection is 15.744613199999998
