In [73]:
import glob
import numpy as np
import pandas as pd
from scipy.stats import poisson_means_test
import plotly.graph_objects as go
from sklearn.mixture import GaussianMixture
from statsmodels.tsa.arima.model import ARIMA

# Helpful Functions

### Functions for plotting

Function to plot the heart rate over the course of the video.

In [74]:
def plot_heart_beat(df):
    # plot the heart rate
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df['HR'], mode='lines', name='Heart Rate'))
    fig.update_layout(
        title="Heart Rate",
        title_font_size=20,
        xaxis=dict(
            title="Time(Seconds)",
            titlefont_size=10,
            tickfont_size=10,
        ),
        yaxis=dict(
            title="Heart Rate (estimated bpm)",
            titlefont_size=10,
            tickfont_size=10,
        ),
    )
    fig.show()

Function to plot the heart rate distribution (data is for 1 video).

In [75]:
def plot_heat_beat_distribution(df):
    # plot a distribution of the heart rate
    fig = go.Figure(data=[go.Histogram(x=df["HR"], nbinsx=26)])
    fig.update_layout(
        title_text="Heart Rate Distribution",  # title of plot
        xaxis_title_text="Heart Rate",  # xaxis label
        yaxis_title_text="Frequency",  # yaxis label
        title_font=dict(size=20),
        xaxis=dict(
            title_font=dict(size=10),
            tickfont=dict(size=10),
        ),
        yaxis=dict(
            title_font=dict(size=10),
            tickfont=dict(size=10),
        ),
    )
    fig.show()

<div>

### Finding elevated and lower heart rates

Looks for the periods where the heart rate is signifigantly elevated.

- An elevated heart rate is defined as a heart rate that is above a certain threshold. 
- The threshold was set to $\mu + \sigma$, where $\mu$ is the mean and $\sigma$ is the standard deviation of a Gaussian function fit to the drivers heart rate distribution.
- The Gaussian function is defined by its mean (μ) and standard deviation (σ). The mean represents the center of the distribution, while the standard deviation determines the spread or dispersion of the data points around the mean.
- Formally: $f(x) = \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{1}{2}(\frac{x-\mu}{\sigma})^2}$
- Example:

<!-- Centered image -->
<div style="text-align:center">
    <img src="images/Normal_Distribution.png" />
</div>

- $threshold = \mu + \sigma$
- Data is **flagged when:** the heart rate is above the threshold, and **unflagged when:** the heart rate is below the threshold.

In [76]:
class Guassian:
    def __init__(self, df = None):
        self.df = df
        self.mu = None
        self.sigma = None

        if df is not None:
            self.fit_guassian_distribution(df)

    def fit_guassian_distribution(self, df):
        self.mu = df.mean()
        self.sigma = df.std()
        return self.mu, self.sigma

    def get_probability(self, x):
        return 1/(np.sqrt(2*np.pi)*self.sigma)*np.exp(-1/2*((x-self.mu)/self.sigma)**2)

    def log_likelihood(self, x):
        return np.log(self.get_probability(x)).sum()

    def plot_guassian_distribution(self):
        # plot the gaussian distribution with the data
        x = np.linspace(self.df.min(), self.df.max(), 100)
        y = self.get_probability(x)

        fig = go.Figure()
        fig.add_trace(go.Histogram(x=self.df, nbinsx=25, histnorm='probability density', name='Heart Rate Distribution'))
        fig.add_trace(go.Scatter(x=x, y=y, mode = "lines", name='Gaussian Distribution'))
        fig.add_trace(go.Scatter(x=[self.mu, self.mu], y=[0, self.get_probability(self.mu)], mode='lines', name='Mean'))
        fig.update_layout(
            title="Gaussian Distribution",
            title_font_size=20,
            xaxis=dict(
                title="Heart Rate",
                titlefont_size=10,
                tickfont_size=10,
            ),
            yaxis=dict(
                title="Probability Density",
                titlefont_size=10,
                tickfont_size=10,
            ),
        )
        fig.show()

    def plot_guassian_distribution_with_threshold(self, elevation):
        # plot the gaussian distribution with the data
        x = np.linspace(self.df.min(), self.df.max(), 100)
        y = self.get_probability(x)

        fig = go.Figure()
        fig.add_trace(go.Histogram(x=self.df, nbinsx=25, histnorm='probability density', name='Heart Rate Distribution'))
        fig.add_trace(go.Scatter(x=x, y=y, mode = "lines", name='Gaussian Distribution'))
        fig.add_trace(go.Scatter(x=[self.mu, self.mu], y=[0, self.get_probability(self.mu)], mode='lines', name='Mean'))
        fig.add_trace(go.Scatter(x=[elevation, elevation], y=[0, self.get_probability(elevation)], mode='lines', name='Threshold'))
        fig.update_layout(
            title="Gaussian Distribution",
            title_font_size=20,
            xaxis=dict(
                title="Heart Rate",
                titlefont_size=10,
                tickfont_size=10,
            ),
            yaxis=dict(
                title="Probability Density",
                titlefont_size=10,
                tickfont_size=10,
            ),
        )
        fig.show()

In [77]:
def find_and_flag_elevated_heart_rate(df, plot_and_print=False):

    # fit a gaussian mixture model to the data
    hr = df['HR'].to_numpy()
    guassian = Guassian(hr)

    if plot_and_print:
        print('Fitting a Guassian plot to the data')
        guassian.plot_guassian_distribution()

    # calculate the elevation threshold
    mean = guassian.mu
    std = guassian.sigma
    elevation_threshold = mean + 2*std

    if plot_and_print:
        guassian.plot_guassian_distribution_with_threshold(elevation_threshold)

        print(f"Flagging elevated heart rate above {elevation_threshold}")

    # grab the vidts if the heart rate is above the elevation threshold
    df["elevated_hr"] = df["HR"] > elevation_threshold

    if plot_and_print:
        print(f'Flagging {df["elevated_hr"].sum()} elevated heart rate events')

        # plot the heart rate with the elevated heart rate flagged
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df.index, y=df['HR'], mode='lines', name='Heart Rate'))
        fig.add_trace(go.Scatter(x=df[df["elevated_hr"]].index, y=df[df["elevated_hr"]]["HR"], mode='markers', name='Elevation HR', marker=dict(color='red')))
        fig.update_layout(
            title="Elevated Heart Rates",
            title_font_size=20,
            xaxis=dict(
                title="Time",
                titlefont_size=10,
                tickfont_size=10,
            ),
            yaxis=dict(
                title="Heart Rate",
                titlefont_size=10,
                tickfont_size=10,
            ),
        )
        fig.show()

    return df

<br>

### Finding heart rate spikes and falls

Looks for looking for periods where the heart rate spikes significantly. 
- A "spike" is defined as the point at which the heart rates rate of change exceeds a certain threshold. 
- A moving average is calculated for the heart rate data. The moving average is calculated using a window of 5 seconds.
- The threshold is set to the 90th percentile of all slope values.
- Data is **flagged when**: the slope exceeds the threshold, and **unflagged when:** the slope is below this threshold.

In [78]:
def find_and_flag_spiking_heart_rate(df, plot_and_print=False, slope_threshold=0.90):

    # moving average over 5 seconds
    df["average_HR"] = df["HR"].rolling(window=50).mean()

    if plot_and_print:
        print('Plotting the moving average of the heart rate')
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df["average_HR"], mode='lines', name='Moving Average'))
        fig.update_layout(
            title="Heart Rate Moving Average",
            title_font_size=20,
            xaxis=dict(
                title="Time(Seconds)",
                titlefont_size=10,
                tickfont_size=10,
            ),
            yaxis=dict(
                title="Heart Rate",
                titlefont_size=10,
                tickfont_size=10,
            ),
        )
        fig.show()

        print('Finding the slope of the moving average')

    # find the slope
    df["average_HR_slope"] = df["average_HR"].diff()

    if plot_and_print:
        print(f'Setting the activation threshold to the steepest {slope_threshold*100}% of slopes')

    # calculate the spiking threshold
    activation_threshold = df["average_HR_slope"].quantile(slope_threshold)        

    # mark all the spikes
    df["spiking_hr"] = df["average_HR_slope"] > activation_threshold


    if plot_and_print:
        print(f'Spiking threshold is {activation_threshold}')

        # plot the heart rate with color coded spiking heart rate
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df["vidts"].dt.total_seconds(), y=df["average_HR"], mode='lines', name='Moving Average'))
        # color the spiking heart rate
        fig.add_trace(go.Scatter(x=df[df["spiking_hr"]]["vidts"].dt.total_seconds(), y=df[df["spiking_hr"]]["average_HR"], mode='markers', name='Spiking HR', marker=dict(color='red')))
        fig.update_layout(
            title="Spiking Heart Rate",
            title_font_size=20,
            xaxis=dict(
                title="Time(Seconds)",
                titlefont_size=10,
                tickfont_size=10,
            ),
            yaxis=dict(
                title="Heart Rate",
                titlefont_size=10,
                tickfont_size=10,
            ),
        )
        fig.show()

    return df

<br>

### Processing Each Participant's Data for the Video

In [79]:
def process_participant_data_for_video(video_data_path, show_all=False, plot_and_print=True):
    """
    Process participant data for a video.
    Parameters:
    - video_data_path (str): The path to the video data.
    - show_all (bool, optional): Whether to show all plots and prints. Default is False.
    - plot_and_print (bool, optional): Whether to plot and print the data. Default is True.
    Returns:
    - participants (dict): A dictionary containing processed data for each participant.
    - flagged_participant_data (DataFrame): A DataFrame containing flagged data for each participant.
    """
    participants = {}
    flagged_participant_data = pd.DataFrame()

    # loop through all participants
    for participant in glob.glob(video_data_path + "*"):
        if "_data" in participant:
            participant_id = participant.replace(video_data_path, "").replace(".csv", "")

            if plot_and_print:
                print(f'\n\n{"-" * 50}')
                print(f"Processing participant {participant_id}")
                print(f'{"-" * 50}\n')

            # import data
            df = pd.read_csv(participant)
            df["vidts"] = pd.to_timedelta(df["vidts"], unit="s")

            # plot the heart rate
            if plot_and_print:
                print("\n📈 Plotting the distribution of heart rate over time:")
                plot_heart_beat(df)
                print("\n📊 Plotting the heart rate distribution:")
                plot_heat_beat_distribution(df)

            # find and flag the elevated heart rate
            df = find_and_flag_elevated_heart_rate(df, plot_and_print)

            # flag the spiking heart rate
            df = find_and_flag_spiking_heart_rate(df, plot_and_print)

            # add to the dictionary
            participants[participant_id.replace(".csv", "")] = df

            if plot_and_print:
                print(f'{"-" * 50}\n\n')

            if not show_all:
                plot_and_print = False

            # add the flagged data as columns
            flagged_participant_data[participant_id + "_elevated_hr"] = df["elevated_hr"]
            flagged_participant_data[participant_id + "_spiking_hr"] = df["spiking_hr"]

    flagged_participant_data.index = participants[list(participants.keys())[0]]["vidts"]

    return participants, flagged_participant_data

### Video Data: Manual Labels -> Time Series Data

Converts a time range in the format 'HH:MM-HH:MM' into a tuple of pandas Timedelta objects.

In [80]:
def parse_timedelta_range(time_range):
    start, end = time_range.split("-")
    return (pd.to_timedelta('00:' + start), pd.to_timedelta('00:' + end))

Extracts and catagories the video labels.

In [81]:
def extract_and_flag_video_events(video_data_path, flagged_video_data):
    """
    Extracts intersection events from a CSV file and flags them in a DataFrame.
    Parameters:
    - video_data_path (str): The file path of the CSV file containing video data.
    - flagged_video_data (DataFrame): The DataFrame where the flagged video data will be stored.
    Returns:
    - flagged_video_data (DataFrame): The updated DataFrame with intersection events flagged.
    """

    # Load the CSV file
    data = pd.read_csv(video_data_path)

    # Initialize lists for each type of event
    intersections = []

    # Iterate through each row in the dataframe to categorize and convert data
    for _, row in data.iterrows():
        time_range = parse_timedelta_range(row["Timestamp:"])

        if pd.notna(row["Intersections:"]):
            intersections.append(time_range)
        if pd.notna(row["Traffic Conflict:"]):
            intersections.append(time_range)
        if pd.notna(row["Left turn:"]):
            intersections.append(time_range)

    # adding data to video_2_participant_data
    flagged_video_data["intersection"] = False
    flagged_video_data["traffic_conflict"] = False
    flagged_video_data["left_turn"] = False

    for start, end in intersections:
        flagged_video_data.loc[
            (flagged_video_data.index >= start) & (flagged_video_data.index <= end),
            "intersection",
        ] = True

    for start, end in intersections:
        flagged_video_data.loc[
            (flagged_video_data.index >= start) & (flagged_video_data.index <= end),
            "traffic_conflict",
        ] = True

    for start, end in intersections:
        flagged_video_data.loc[
            (flagged_video_data.index >= start) & (flagged_video_data.index <= end),
            "left_turn",
        ] = True

    return flagged_video_data

### E Test

Performs a E-test on flagged video data to determine if there is a significant difference in heart rate during certain flagged conditions. 
- Statistical significance is determined by a p-value less than 0.05.
- Separates the data into two groups based on the flag condition. (e.g. elevated heart rate vs. normal heart rate)
- Calculates the sum of heart rate anomalies for each group.
- Performs a e-test.
- If the p-value from the e-test is less than 0.05, it indicates a significant difference in heart rate during the flagged condition.

In [82]:
def etest(flagged_video_data, flag_id, hr_anomaly):
    # find the data with the condition
    sample_1_data = flagged_video_data[flagged_video_data[flag_id]].filter(like=hr_anomaly).sum(axis=1)

    # find the data without any condition
    road_condition_flags = ['intersection', 'traffic_conflict', 'left_turn']
    sample_2_data = flagged_video_data[~flagged_video_data[road_condition_flags].any(axis=1)].filter(like=hr_anomaly).sum(axis=1)

    t_stat, p_val = poisson_means_test(
        sample_1_data.sum(), sample_1_data.count(), sample_2_data.sum(), sample_2_data.count()
    )

    if p_val < 0.05:
        print(f'There is a significant difference between a {hr_anomaly.replace("_hr", " heart rate")} during/within a(n) {flag_id}: The p-value is {p_val}. ')
    elif not np.isnan(p_val):
        print(f'There is no significant difference between {hr_anomaly.replace("_hr", " heart rate")} during/within a(n) {flag_id}: The p-value is {p_val}. ')
    return t_stat, p_val

Performs a t test for every condition.

In [83]:
def determine_statistical_significance(flagged_video_data):
    intersection_elevated_p_val = etest(flagged_video_data, "intersection", "elevated_hr")
    intersection_spiking_p_val = etest(flagged_video_data, "intersection", "spiking_hr")
    traffic_conflict_elevated_p_val = etest(flagged_video_data, "traffic_conflict", "elevated_hr")
    traffic_conflict_spiking_p_val = etest(flagged_video_data, "traffic_conflict", "spiking_hr")

    return {
        "intersection_elevated_p_val": intersection_elevated_p_val,
        "intersection_spiking_p_val": intersection_spiking_p_val,
        "traffic_conflict_elevated_p_val": traffic_conflict_elevated_p_val,
        "traffic_conflict_spiking_p_val": traffic_conflict_spiking_p_val,
    }

Plots the distribution of heart rate anomalies for each condition to visually ensure that the data is normally distributed, and the variances are equal.

In [84]:
def checking_t_test_assumptions(df, video_number):
    # series of intersections
    intersection = df["intersection"]

    # Sum of heart rate anomalies
    elevated_hr = df.filter(like="elevated_hr").sum(axis=1)
    spiking_hr = df.filter(like="spiking_hr").sum(axis=1)

    # Sum of heart rate anomalies at intersections
    intersection_true_elevations = elevated_hr[intersection]
    intersection_true_spikes = spiking_hr[intersection]
    intersection_true_anomalies = intersection_true_elevations + intersection_true_spikes 

    # Sum of heart rate anomalies not at intersections
    intersection_false_spikes = spiking_hr[~intersection]
    intersection_false_elevations = elevated_hr[~intersection]
    intersection_false_anomalies = intersection_false_elevations + intersection_false_spikes 

    # sample size
    n1 = len(intersection_true_anomalies)
    n2 = len(intersection_false_anomalies)

    print(f'The sample size of heart rate anomalies at intersections is {n1}')
    print(f'The sample size of heart rate anomalies not at intersections is {n2}')

    fig = go.Figure()
    fig.add_trace(
        go.Histogram(
            x=intersection_true_elevations,
            nbinsx=25,
            histnorm="probability density",
            name="At Intersection",
        )
    )
    fig.add_trace(
        go.Histogram(
            x=intersection_false_elevations,
            nbinsx=25,
            histnorm="probability density",
            name="Not at Intersection",
        )
    )
    fig.update_layout(
        # title_text="Probability Density of Heart Rate Elevation, Video " + str(video_number),
        xaxis_title_text="Heart Rate",
        yaxis_title_text="Probability Density",
    )
    fig.show()

    # fig = go.Figure()
    # fig.add_trace(
    #     go.Box(
    #         y=intersection_true_elevations,
    #         name="At Intersection",
    #     )
    # )
    # fig.add_trace(
    #     go.Box(
    #         y=intersection_false_elevations,
    #         name="Not at Intersection",
    #     )
    # )
    # fig.update_layout(
    #     title_text="Heart Rate Elevation Box Plot",
    #     yaxis_title_text="Sum of Heart Rate Elevations",
    #     title_font=dict(size=20),
    #     xaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    #     yaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    # )
    # fig.show()

    fig = go.Figure()
    fig.add_trace(
        go.Histogram(
            x=intersection_true_spikes,
            nbinsx=25,
            histnorm="probability density",
            name="At Intersection",
        )
    )
    fig.add_trace(
        go.Histogram(
            x=intersection_false_spikes,
            nbinsx=25,
            histnorm="probability density",
            name="Not at Intersection",
        )
    )
    fig.update_layout(
        # title_text="Probability Density of Heart Rate Spikes, Video " + str(video_number),
        xaxis_title_text="Heart Rate",
        yaxis_title_text="Probability Density",
    )
    fig.show()

    # fig = go.Figure()
    # fig.add_trace(
    #     go.Box(
    #         y=intersection_true_spikes,
    #         name="At Intersection",
    #     )
    # )
    # fig.add_trace(
    #     go.Box(
    #         y=intersection_false_spikes,
    #         name="Not at Intersection",
    #     )
    # )
    # fig.update_layout(
    #     title_text="Heart Rate Stimuli Box Plot",
    #     yaxis_title_text="Sum of Heart Rate Stimuli",
    #     title_font=dict(size=20),
    #     xaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    #     yaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    # )
    # fig.show()

    # fig = go.Figure()
    # fig.add_trace(
    #     go.Histogram(
    #         x=intersection_true_anomalies,
    #         nbinsx=25,
    #         histnorm="probability density",
    #         name="At Intersection",
    #     )
    # )
    # fig.add_trace(
    #     go.Histogram(
    #         x=intersection_false_anomalies,
    #         nbinsx=25,
    #         histnorm="probability density",
    #         name="Not at Intersection",
    #     )
    # )
    # fig.update_layout(
    #     title_text="Sum of Heart Rate Anomalies",
    #     xaxis_title_text="Heart Rate",
    #     yaxis_title_text="Probability Density",
    #     title_font=dict(size=20),
    #     xaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    #     yaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    # )
    # fig.show()

    # fig = go.Figure()
    # fig.add_trace(
    #     go.Box(
    #         y=intersection_true_anomalies,
    #         name="At Intersection",
    #     )
    # )
    # fig.add_trace(
    #     go.Box(
    #         y=intersection_false_anomalies,
    #         name="Not at Intersection",
    #     )
    # )
    # fig.update_layout(
    #     title_text="Heart Rate Anomalies Box Plot",
    #     yaxis_title_text="Sum of Heart Rate Anomalies",
    #     title_font=dict(size=20),
    #     xaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    #     yaxis=dict(
    #         title_font=dict(size=10),
    #         tickfont=dict(size=10),
    #     ),
    # )
    # fig.show()

---

# Results

In [85]:
# videos
videos = [2,5,7,10,11,12,14]

# one dataframe to store the data for all the videos
all_videos_data = pd.DataFrame()
for video in videos:
    print(f'{"-" * 50}')
    print(f'Processing the data for Video {video}')
    print(f'{"-" * 50}')
    _, flagged_process_participant_data_for_video = process_participant_data_for_video(f"./data/Video_Data/Video_{video}\\", plot_and_print=False)

    # Extract the video labels
    video_file_path = f"./data/Video_Data/Video_{video}/Video{video}.csv"
    # extract and flag the video data
    flagged_process_participant_data_for_video = extract_and_flag_video_events(video_file_path, flagged_process_participant_data_for_video)

    # add the video data to the all videos data
    all_videos_data = pd.concat([all_videos_data, flagged_process_participant_data_for_video])

    # e-test
    checking_t_test_assumptions(flagged_process_participant_data_for_video, video)
    (
        intersection_elevated_p_val,
        intersection_spiking_p_val,
        traffic_conflict_elevated_p_val,
        traffic_conflict_spiking_p_val
    ) = determine_statistical_significance(flagged_process_participant_data_for_video)

    print()

# print(f'{"-" * 50}')
# print(f"Processing the all video data")
# print(f'{"-" * 50}')
# checking_t_test_assumptions(all_videos_data)
# determine_statistical_significance(all_videos_data)

--------------------------------------------------
Processing the data for Video 2
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 3739
The sample size of heart rate anomalies not at intersections is 5262


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 4.901912792272318e-12. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 4.2890097670922075e-05. 
There is a significant difference between a elevated heart rate during/within a(n) traffic_conflict: The p-value is 4.901912792272318e-12. 
There is a significant difference between a spiking heart rate during/within a(n) traffic_conflict: The p-value is 4.2890097670922075e-05. 

--------------------------------------------------
Processing the data for Video 5
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 3130
The sample size of heart rate anomalies not at intersections is 5871


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 5.472000082898588e-15. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 0.0013914598985017936. 
There is a significant difference between a elevated heart rate during/within a(n) traffic_conflict: The p-value is 5.472000082898588e-15. 
There is a significant difference between a spiking heart rate during/within a(n) traffic_conflict: The p-value is 0.0013914598985017936. 

--------------------------------------------------
Processing the data for Video 7
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 5160
The sample size of heart rate anomalies not at intersections is 3841


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 4.8538152338445614e-11. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 6.279729109046054e-07. 
There is a significant difference between a elevated heart rate during/within a(n) traffic_conflict: The p-value is 4.8538152338445614e-11. 
There is a significant difference between a spiking heart rate during/within a(n) traffic_conflict: The p-value is 6.279729109046054e-07. 

--------------------------------------------------
Processing the data for Video 10
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 3641
The sample size of heart rate anomalies not at intersections is 5360


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 1.4114474149658955e-19. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 4.5756330706370046e-21. 
There is a significant difference between a elevated heart rate during/within a(n) traffic_conflict: The p-value is 1.4114474149658955e-19. 
There is a significant difference between a spiking heart rate during/within a(n) traffic_conflict: The p-value is 4.5756330706370046e-21. 

--------------------------------------------------
Processing the data for Video 11
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 3610
The sample size of heart rate anomalies not at intersections is 5391


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 0.0. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 1.5925394818566597e-23. 
There is a significant difference between a elevated heart rate during/within a(n) traffic_conflict: The p-value is 0.0. 
There is a significant difference between a spiking heart rate during/within a(n) traffic_conflict: The p-value is 1.5925394818566597e-23. 

--------------------------------------------------
Processing the data for Video 12
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 4051
The sample size of heart rate anomalies not at intersections is 4950


There is no significant difference between elevated heart rate during/within a(n) intersection: The p-value is 0.9973101603900965. 
There is a significant difference between a spiking heart rate during/within a(n) intersection: The p-value is 0.0010030225304248131. 
There is no significant difference between elevated heart rate during/within a(n) traffic_conflict: The p-value is 0.9973101603900965. 
There is a significant difference between a spiking heart rate during/within a(n) traffic_conflict: The p-value is 0.0010030225304248131. 

--------------------------------------------------
Processing the data for Video 14
--------------------------------------------------
The sample size of heart rate anomalies at intersections is 4620
The sample size of heart rate anomalies not at intersections is 4381


There is a significant difference between a elevated heart rate during/within a(n) intersection: The p-value is 1.3133903928087657e-08. 
There is no significant difference between spiking heart rate during/within a(n) intersection: The p-value is 0.19359704717556891. 
There is a significant difference between a elevated heart rate during/within a(n) traffic_conflict: The p-value is 1.3133903928087657e-08. 
There is no significant difference between spiking heart rate during/within a(n) traffic_conflict: The p-value is 0.19359704717556891. 



In [86]:
# how long is an interation
iteration_length = all_videos_data.index[1] - all_videos_data.index[0]
iteration_length

Timedelta('0 days 00:00:00.099977970')

In [87]:
# sum of elevated heart rate anomalies
heart_rate_elevations = all_videos_data.filter(like='elevated_hr').diff().fillna(all_videos_data.iloc[0]).sum().sum() / 2
# the sum of iteratoins where the heart rate is elevated
iterations_with_elevated_heart_rate = all_videos_data.filter(like="elevated_hr").sum().sum()

# average length of time the heart rate is elevated
(iterations_with_elevated_heart_rate / heart_rate_elevations) * iteration_length


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



Timedelta('0 days 00:00:00.119975725')

In [88]:
# sum of spiking heart rate anomalies
heart_rate_spikes = all_videos_data.filter(like='spiking_hr').diff().fillna(all_videos_data.iloc[0]).sum().sum() / 2
# the sum of iteratoins where the heart rate is spiking
iterations_with_spiking_heart_rate = all_videos_data.filter(like="spiking_hr").sum().sum()

# average length of time the heart rate is spiking
(iterations_with_spiking_heart_rate / heart_rate_spikes) * iteration_length


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



Timedelta('0 days 00:00:01.839154058')

In [89]:
# sum of intersections
intersections = all_videos_data["intersection"].diff().fillna(all_videos_data.iloc[0]).sum() / 2
# the some of iterations where there is an intersection
iterations_with_intersections = all_videos_data["intersection"].sum()

# average length of time there is an intersection
average_iteration_for_intersection = iterations_with_intersections / intersections

In [90]:
# convert the iteration length to seconds
print(f'The average time for an intersection is {average_iteration_for_intersection * iteration_length.total_seconds()}')

The average time for an intersection is 14.630665586387433
