In [1]:
%matplotlib widget
#%matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.patheffects as mpe
from IPython.display import display
import ipywidgets as widgets
import pandas as pd
import numpy as np
import matplotlib
import sys
from scipy import stats
import math

In [2]:
# Full width cells
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
# Load our data from file
LocationAllData = "../data-cleanup/merged-cleaned-csv/driving_data_merged_1_to_26.csv"
df = pd.read_csv(LocationAllData)

In [4]:
# Create a new column which assigns each row to a specific distance window.
# The values in each window are averaged later on

# Decide which distance interval to average over
distanceWindowWidth = 50

def assignDistanceWindow(distance):
    distanceWindowIndex = round(distance/distanceWindowWidth)
    return distanceWindowWidth * distanceWindowIndex

# Add a new row which containts which distance group each row belongs to
df["distanceWindow"] = df["distanceTraveled"].apply(assignDistanceWindow)

How to apply multiple functions at once to a grouped data frame. In our case we want both the mean and standard deviation!
https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#applying-multiple-functions-at-once

In [5]:
# extract the data we need into a new data frame
display(df.info())

# needed_data = df[["Attempt nr", "evisID", "userID", "distanceWindow", "distanceTraveled",
#                   "speed", "currentStateOfCharge", "yPosition"]].copy()

needed_data = df.copy()

needed_data = needed_data.rename(columns={"yPosition": "roadHeight"})

### Exclude users who failed to understand the task fully ###

# Give the id's of the users we want to exclude from the study
needed_data = needed_data[
    (needed_data["userID"] != 16) & # Diff - Didn't understand the blue line
    (needed_data["userID"] != 25) &  # Guess - Seemed like the participant didn't understand that going at a lower speed was allowed
    (needed_data["userID"] != 5) &   # Guess - Participant didn't use the Range Estimate number in the dashboard 
    (needed_data["userID"] != 10)   # Diff - Participant didn't use bars at all, only focused on the speed and energy usage
    #(needed_data["userID"] != 15)  # Guess - Hard to judge, tried to regain enrgy by breaking quiet a bit, realized it didn't work
]

### Comments ###
"""
Diff + COPE1
4, 8, 18, 26 - Hovered the blue line when entering the highway
"""
# IDEA: Plot the difference between these two groups? Difference in tactic between them

# Test to exclude line riders
# needed_data = needed_data[
#     (needed_data["userID"] != 4) & 
#     (needed_data["userID"] != 8) & 
#     (needed_data["userID"] != 18) &
#     (needed_data["userID"] != 26)
# ]

# Test: Exclude least energy users
# needed_data = needed_data[
#     (needed_data["userID"] != 2)  & 
#     (needed_data["userID"] != 6)  & 
#     (needed_data["userID"] != 12) &
#     (needed_data["userID"] != 14) &
#     (needed_data["userID"] != 20) & 
#     (needed_data["userID"] != 22) & 
#     (needed_data["userID"] != 24) 
# ]

### Useful values needed later on ###

# Get all the unique user id's
all_user_ids = needed_data.userID.unique()
# Total number of users
number_of_users = all_user_ids.size

### Individual driver based calculations ###

# Compute the mean values for each driver separately within the given distance window
average_individual = needed_data.groupby(["Attempt nr", "evisID", "userID", "distanceWindow"]).mean().reset_index()
# Group based on how we want to plot it later on. Each user separatly
average_individual_grouped = average_individual.groupby(["Attempt nr", "evisID", "userID"])

### Group based calculations ###

average_group_based = average_individual.copy()
# Group based on each distance window for each attempt and which EVIS used
average_group_based_grouped = average_group_based.groupby(["Attempt nr", "evisID", "distanceWindow"], as_index=False)
# Calculate the mean and standard deviation for each group group. Agg is used to apply multiple functions at once!
average_group_based = average_group_based_grouped[["speed", "currentStateOfCharge", "roadHeight"]].agg({"speed": ["mean", "std"], "currentStateOfCharge": ["mean", "std"], "roadHeight": "mean"})
# New column names
average_group_based.columns = ["Attempt nr", "evisID", "distanceWindow", "speed mean", "speed std", "currentStateOfCharge mean", "currentStateOfCharge std", "roadHeight"]
# Remove the tree like column structure created by agg (aggregation). And use the new column names instead
average_group_based.reindex(columns=sorted(average_group_based.columns))
# Finally re-group them based on how we want to plot group based mean
average_group_based_grouped = average_group_based.groupby(["Attempt nr", "evisID"])

### Average road height calc ###

# Create a copy of the data we need from the group based calculations
average_road_height = average_group_based[["distanceWindow", "roadHeight"]].copy()
# Group by just the distance window and calculate the mean of the yPosition
average_road_height = average_road_height.groupby(["distanceWindow"], as_index=False).mean()
# No need to create a new group since it's just one line to plot for the entire data frame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 282604 entries, 0 to 282603
Data columns (total 17 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   Attempt nr                282604 non-null  int64  
 1   userID                    282604 non-null  int64  
 2   evisID                    282604 non-null  object 
 3   timeStamp                 282604 non-null  float64
 4   currentStateOfCharge      282604 non-null  float64
 5   energyConsumed            282604 non-null  float64
 6   energyUsage               282604 non-null  float64
 7   guesstimatedDistanceLeft  282604 non-null  float64
 8   speed                     282604 non-null  float64
 9   distanceTraveled          282604 non-null  float64
 10  throttlePosition          282604 non-null  float64
 11  breakPosition             282604 non-null  float64
 12  steeringWheelRot          282604 non-null  float64
 13  xPosition                 282604 non-null  f

None

# Interactive plot for speed over distance

In [6]:
# Plot window presets
plt.style.use("default")
params = {
    'font.size': 18,
    'axes.labelsize': 18,
    'axes.titlesize': 20,
    'legend.fontsize': 15,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18
}
matplotlib.rcParams.update(params)

colorGuessOMeter = "C0"
colorDiffCOPE1 = "C1"
colorRoadHeight = "k"
individual_attempts_are_hidden = False
color_diff = "#FFA85C"
color_diff_border = "#E06900"
color_guess = "#3F9CDE"
color_guess_border = "#1B679D"


def didUserMakeIt(group):
    return group["distanceWindow"].iloc[-1] == 8000

# Class for the individual attempt line
class IndividualAttemptLine:
    line_highlight = {"a": 1.0, "w": 2.5}
    line_normal = {"a": 0.4, "w": 2.0}
    marker_highlight = 1.0
    marker_normal = 0.7
    
    def __init__(self, ax, name, group):
        self.group = group
        self.driver_id = name[2]
        self.draw_order_base = self.driver_id * 3
        self.is_highlighted = False
        self.color = color_guess if name[1] == "GuessOMeter" else color_diff
        self.color_strong = color_guess_border if name[1] == "GuessOMeter" else color_diff_border
        
        # Individual Attempt
        self.line = ax.plot(group["distanceWindow"], group["speed"], 
                            color=self.color, zorder=self.draw_order_base, 
                            linewidth=self.line_normal["w"], alpha=self.line_normal["a"])[0]
        
        # Marking for where the user ended up
        self.marker = ax.plot(group["distanceWindow"].iloc[-1], group["speed"].iloc[-1], 
                              color=self.color, zorder=self.draw_order_base + 1, marker="o", markersize=25, 
                              alpha=self.marker_normal)[0]
        
        # Mark the endpoint with the user id as well
        self.text = ax.text(group["distanceWindow"].iloc[-1], group["speed"].iloc[-1], 
                            s=self.driver_id, color="k", size="small", zorder=self.draw_order_base + 2, clip_on=True, 
                            horizontalalignment='center', verticalalignment='center')
        
        
    
    def highlight(self, value):
        self.is_highlighted = value
        
        self.line.set_alpha(self.line_highlight["a"] if self.is_highlighted else self.line_normal["a"])
        self.line.set_linewidth(self.line_highlight["w"] if self.is_highlighted else self.line_normal["w"])
        self.line.set_zorder(500 + self.driver_id * 3 + 1 if self.is_highlighted else self.driver_id * 3 + 1)
        
        # Use path effect to see overlaps more clearly
        pe1 = [
            mpe.Stroke(linewidth=5, foreground=self.color_strong),
            mpe.Stroke(foreground='white',alpha=1),
            mpe.Normal()
        ]
        self.line.set_path_effects(pe1 if self.is_highlighted else [])
        
        self.marker.set_alpha(self.marker_highlight if self.is_highlighted else self.marker_normal)
        self.marker.set_zorder(500 + self.driver_id * 3 + 2 if self.is_highlighted else self.driver_id * 3 + 2)
        self.marker.set_path_effects(pe1 if self.is_highlighted else [])
        
        self.text.set_zorder((500 + self.driver_id * 3 + 3) if self.is_highlighted else (self.driver_id * 3 + 3))
        
        # If we highlight and the rest of the lines are hidden -> show it
        if self.is_highlighted:
            self.text.set_alpha(1)
        # If we remove highlight and the rest are hidden -> hide it
        if not self.is_highlighted and individual_attempts_are_hidden:
            self.setHide(True)
    
    def setHide(self, hide, force=False):
        # If the line is alreday highlighted don't hide it! Unless fore is set to true
        if self.is_highlighted and force == False:
            return
        
        self.line.set_alpha(0 if hide else self.line_normal["a"])
        self.marker.set_alpha(0 if hide else self.marker_normal)
        self.text.set_alpha(0 if hide else 1)

    def showIfFailed(self):
        if didUserMakeIt(self.group):
            self.setHide(True, True)
        else:
            self.setHide(False)
        
class MeanWithSTD:
    def __init__(self, ax, name, group):
        self.color = color_guess if name[1] == "GuessOMeter" else color_diff
        self.attempt = name[0]
        self.std_alpha = 0.4
        self.color_strong = color_guess_border if name[1] == "GuessOMeter" else color_diff_border
        
        self.average_line = ax.plot(group["distanceWindow"], group["speed mean"],
            color=self.color, zorder=4, linewidth=3, alpha=1)[0]
        
        # Use path effect to see overlaps more clearly
        pe1 = [
            mpe.Stroke(linewidth=5, foreground=self.color_strong),
            mpe.Stroke(foreground='white',alpha=1),
            mpe.Normal()
        ]
        self.average_line.set_path_effects(pe1)
        
        self.zorder = 1 if name[1] == "GuessOMeter" else 2
        self.std_band = ax.fill_between(
            group["distanceWindow"],
            group["speed mean"] + group["speed std"],
            group["speed mean"] - group["speed std"],
            color=self.color, alpha=self.std_alpha, zorder=self.zorder, hatch="//")

    def setHide(self, hide):
        self.average_line.set_alpha(0 if hide else 1)
        self.std_band.set_alpha(0 if hide else self.std_alpha)

In [7]:
# Plot lines
whichAttempt = 1

# Individual attempts plot
output = widgets.Output()

with output:
    fig, ax = plt.subplots(figsize=(14, 7))
ax.set_xlabel("Distance [m]")
ax.set_ylabel("Speed [km/h]")
ax.set_title("Speed over distance traveled during attempt #" + str(whichAttempt))
ax.grid(True)

### Draw the plot objects ###

# Plot the individual attempts as transparent lines. At the end of each attempt, plot a dot to mark it
driving_lines_class_dict = {}
for name,group in average_individual_grouped:
    if name[0] != whichAttempt: 
        continue
    
    individual_attempt = IndividualAttemptLine(ax, name, group)
    driving_lines_class_dict[name[2]] = individual_attempt

# Plot the average line with +- standard deviation
mean_std_class_dict = {}
for name,group in average_group_based_grouped:
    if name[0] != whichAttempt: 
        continue
    
    mean_with_std = MeanWithSTD(ax, name, group)
    mean_std_class_dict[name[1]] = mean_with_std       

### t-test visualization ###

sampled_distances = np.arange(0, 8000 + 1, distanceWindowWidth)
#for dWindow in sampled_distances:
    

# All the averaged values for each individual
individual = average_individual[["userID","Attempt nr","evisID","distanceWindow","speed"]] # Get the columns we need
individual = individual[(individual["Attempt nr"] == whichAttempt) & (individual["evisID"] == "DiffAndCOPE1")] # Get one particular attempt and evis values
individual_pivot = individual.pivot(index="userID", columns="distanceWindow", values="speed") # Get the average speeds for each distance window for each user
a1_s1_diff = individual_pivot.round(1)

# All the averaged values for each individual
individual = average_individual[["userID","Attempt nr","evisID","distanceWindow","speed"]] # Get the columns we need
individual = individual[(individual["Attempt nr"] == whichAttempt) & (individual["evisID"] == "GuessOMeter")] # Get one particular attempt and evis values
individual_pivot = individual.pivot(index="userID", columns="distanceWindow", values="speed") # Get the average speeds for each distance window for each user
a1_s1_guess = individual_pivot.round(1)

# IDEA: Check for statistical difference using ttest at each distance window.
# If ther's a difference, paint the background in a particular color to highlight that "here wer have a statistical difference!!!"

# stat_different = []
# for dWindow in sampled_distances:
#     section1d = a1_s1_diff[[dWindow]]
#     section1d = section1d.dropna()
    
#     section1g = a1_s1_guess[[dWindow]]
#     section1g = section1g.dropna()
    
#     s,t = stats.ttest_ind(section1d, section1g)
    
#     stat_different.append(t[0] < 0.05)

# # Column which tells us if there is a statistical difference between the two groups at each distance window!
# stat_df = pd.DataFrame(data=stat_different, index=sampled_distances, columns=["Statistical significant difference"])
    
# for index,row in stat_df.iterrows():
#     # If there's a statistical difference here, draw a vertical area!
#     if(row["Statistical significant difference"] == True):
#         ax.axvspan(index-distanceWindowWidth/2,index+distanceWindowWidth/2, alpha=0.3, color="green", linewidth=0)
        
### Road height ###

# Plot road height
ax2 = ax.twinx()
ax2.set_ylim(-10, 30)
ax2.grid(True, linestyle="--", linewidth=2)
ax2.set_ylabel('Road height [m]', color=colorRoadHeight)
road_height_plot, = ax2.plot(average_road_height["distanceWindow"], average_road_height["roadHeight"], 
                        color=colorRoadHeight, zorder=2, linewidth=2, linestyle="--", alpha=1, label="Road height")

# Marker for where the 110 km/h signs are
vline = ax.axvline(x=3000, color="k", linestyle="--", linewidth=4, label="110 km/h sign")

# Create a legend
first_patch = patches.Patch(color=colorGuessOMeter, label="Guess-o-meter")
second_patch = patches.Patch(color=colorDiffCOPE1, label="COPE1 + diff")
ax.legend(handles=[first_patch, second_patch, road_height_plot, vline], loc="upper left").set_zorder(2000)

ax.set_yticks(np.arange(0,101,10))

# Prevent secondary axis to be drawn ontop of everything
ax.set_zorder(1)
ax.patch.set_visible(False)

In [8]:
### Widgets for updating the graph ###

def highlightLine(x):
    id = int(x.owner.description)
    driving_lines_class_dict[id].highlight(x.new)

# Allow user to highlight individual attempts
checkboxes_guess = [widgets.Label("Guess-o-meter")]
checkboxes_copediff = [widgets.Label("COPE1 + diff")]
for i in range(number_of_users):
    checkbox = widgets.Checkbox(description=str(all_user_ids[i]), value=False)
    checkbox.observe(highlightLine, "value")
    if all_user_ids[i]%2:
        checkboxes_guess.append(checkbox)
    else:
        checkboxes_copediff.append(checkbox)

container_guess_checkboxes = widgets.VBox(checkboxes_guess)
container_diffcope_checkboxes = widgets.VBox(checkboxes_copediff)
checkboxes = widgets.VBox([container_guess_checkboxes, container_diffcope_checkboxes])

# Allow user to update the range of each axis
def updateXRange(x):
    margin = 200
    ax.set_xlim(x.new[0] - margin, x.new[1] + margin)
def updateYRange(y):
    margin = 0
    ax.set_ylim(y.new[0] - margin, y.new[1] + margin)
def updateYRangeSecondary(y):
    margin = 1
    ax2.set_ylim(y.new[0] - margin, y.new[1] + margin)
    
def createRangeSlider(min, max, step, desc):
    return widgets.IntRangeSlider(
        value=[min,max],
        min=min,
        max=max,
        step=step,
        description=desc
    )

range_slider_x = createRangeSlider(0,8000, 100, "X Range")
range_slider_x.observe(updateXRange,"value")

range_slider_y = createRangeSlider(0,140,10, "Y Range (left)")
range_slider_y.observe(updateYRange,"value")

range_slider_y_secondary = createRangeSlider(-10,40,1, "Y Range (Right)")
range_slider_y_secondary.observe(updateYRangeSecondary,"value")

# Allow user to select what to show
def toggleIndividualAttempts(show):
    global individual_attempts_are_hidden # This is needed to change global variables inside a function
    individual_attempts_are_hidden = not show.new
    for line in driving_lines_class_dict:
        driving_lines_class_dict[line].setHide(individual_attempts_are_hidden)
            
show_individual_attempts_checkbox = widgets.Checkbox(description="Show individual attempts", value=True)
show_individual_attempts_checkbox.observe(toggleIndividualAttempts, "value")

# TODO: Show failed attempts
def showFailedAttempts(show):
    for line in driving_lines_class_dict:
        driving_lines_class_dict[line].showIfFailed()
    
show_failed_attempts = widgets.Button(description="Show failed attempts")
show_failed_attempts.on_click(showFailedAttempts)
    
# TODO: Show successful attempts

# TODO: Show average speed

def toggleMeanSTD(show):
    for plot_obj in mean_std_class_dict:
        mean_std_class_dict[plot_obj].setHide(not show.new)

toggle_mean_std = widgets.Checkbox(description="Show mean & std", value=True)
toggle_mean_std.observe(toggleMeanSTD, "value")

# TODO: Show standard deviation

box_2 = widgets.HBox([range_slider_x, range_slider_y, range_slider_y_secondary])
box_3 = widgets.HBox([show_individual_attempts_checkbox, toggle_mean_std, show_failed_attempts])
box_1 = widgets.HBox([output, checkboxes])
widgets.VBox([box_1, box_2, box_3])

VBox(children=(HBox(children=(Output(), VBox(children=(VBox(children=(Label(value='Guess-o-meter'), Checkbox(v…

In [9]:
# Plot lines
whichAttempt = 2

# Individual attempts plot
output = widgets.Output()

with output:
    fig, ax = plt.subplots(figsize=(14, 7))
ax.set_xlabel("Distance [m]")
ax.set_ylabel("Speed [km/h]")
ax.set_title("Speed over distance traveled during attempt #" + str(whichAttempt))
ax.grid(True)

### Draw the plot objects ###

# Plot the individual attempts as transparent lines. At the end of each attempt, plot a dot to mark it
driving_lines_class_dict = {}
for name,group in average_individual_grouped:
    if name[0] != whichAttempt: 
        continue
    
    individual_attempt = IndividualAttemptLine(ax, name, group)
    driving_lines_class_dict[name[2]] = individual_attempt

# Plot the average line with +- standard deviation
mean_std_class_dict = {}
for name,group in average_group_based_grouped:
    if name[0] != whichAttempt: 
        continue
    
    mean_with_std = MeanWithSTD(ax, name, group)
    mean_std_class_dict[name[1]] = mean_with_std       

### t-test visualization ###

sampled_distances = np.arange(0, 8000 + 1, distanceWindowWidth)
#for dWindow in sampled_distances:
    

# All the averaged values for each individual
individual = average_individual[["userID","Attempt nr","evisID","distanceWindow","speed"]] # Get the columns we need
individual = individual[(individual["Attempt nr"] == whichAttempt) & (individual["evisID"] == "DiffAndCOPE1")] # Get one particular attempt and evis values
individual_pivot = individual.pivot(index="userID", columns="distanceWindow", values="speed") # Get the average speeds for each distance window for each user
a1_s1_diff = individual_pivot.round(1)

# All the averaged values for each individual
individual = average_individual[["userID","Attempt nr","evisID","distanceWindow","speed"]] # Get the columns we need
individual = individual[(individual["Attempt nr"] == whichAttempt) & (individual["evisID"] == "GuessOMeter")] # Get one particular attempt and evis values
individual_pivot = individual.pivot(index="userID", columns="distanceWindow", values="speed") # Get the average speeds for each distance window for each user
a1_s1_guess = individual_pivot.round(1)

# IDEA: Check for statistical difference using ttest at each distance window.
# If ther's a difference, paint the background in a particular color to highlight that "here wer have a statistical difference!!!"

# stat_different = []
# for dWindow in sampled_distances:
#     section1d = a1_s1_diff[[dWindow]]
#     section1d = section1d.dropna()
    
#     section1g = a1_s1_guess[[dWindow]]
#     section1g = section1g.dropna()
    
#     s,t = stats.ttest_ind(section1d, section1g)
    
#     stat_different.append(t[0] < 0.05)

# # Column which tells us if there is a statistical difference between the two groups at each distance window!
# stat_df = pd.DataFrame(data=stat_different, index=sampled_distances, columns=["Statistical significant difference"])
    
# for index,row in stat_df.iterrows():
#     # If there's a statistical difference here, draw a vertical area!
#     if(row["Statistical significant difference"] == True):
#         ax.axvspan(index-distanceWindowWidth/2,index+distanceWindowWidth/2, alpha=0.3, color="green", linewidth=0)
        
### Road height ###

# Plot road height
ax2 = ax.twinx()
ax2.set_ylim(-10, 30)
ax2.grid(True, linestyle="--", linewidth=2)
ax2.set_ylabel('Road height [m]', color=colorRoadHeight)
road_height_plot, = ax2.plot(average_road_height["distanceWindow"], average_road_height["roadHeight"], 
                        color=colorRoadHeight, zorder=2, linewidth=2, linestyle="--", alpha=1, label="Road height")

# Marker for where the 110 km/h signs are
vline = ax.axvline(x=3000, color="k", linestyle="--", linewidth=4, label="110 km/h sign")

# Create a legend
first_patch = patches.Patch(color=colorGuessOMeter, label="Guess-o-meter")
second_patch = patches.Patch(color=colorDiffCOPE1, label="COPE1 + diff")
ax.legend(handles=[first_patch, second_patch, road_height_plot, vline], loc="upper left").set_zorder(2000)

ax.set_yticks(np.arange(0,101,10))

# Prevent secondary axis to be drawn ontop of everything
ax.set_zorder(1)
ax.patch.set_visible(False)

In [10]:
### Widgets for updating the graph ###

def highlightLine(x):
    id = int(x.owner.description)
    driving_lines_class_dict[id].highlight(x.new)

# Allow user to highlight individual attempts
checkboxes_guess = [widgets.Label("Guess-o-meter")]
checkboxes_copediff = [widgets.Label("COPE1 + diff")]
for i in range(number_of_users):
    checkbox = widgets.Checkbox(description=str(all_user_ids[i]), value=False)
    checkbox.observe(highlightLine, "value")
    if all_user_ids[i]%2:
        checkboxes_guess.append(checkbox)
    else:
        checkboxes_copediff.append(checkbox)

container_guess_checkboxes = widgets.VBox(checkboxes_guess)
container_diffcope_checkboxes = widgets.VBox(checkboxes_copediff)
checkboxes = widgets.VBox([container_guess_checkboxes, container_diffcope_checkboxes])

# Allow user to update the range of each axis
def updateXRange(x):
    margin = 200
    ax.set_xlim(x.new[0] - margin, x.new[1] + margin)
def updateYRange(y):
    margin = 0
    ax.set_ylim(y.new[0] - margin, y.new[1] + margin)
def updateYRangeSecondary(y):
    margin = 1
    ax2.set_ylim(y.new[0] - margin, y.new[1] + margin)
    
def createRangeSlider(min, max, step, desc):
    return widgets.IntRangeSlider(
        value=[min,max],
        min=min,
        max=max,
        step=step,
        description=desc
    )

range_slider_x = createRangeSlider(0,8000, 100, "X Range")
range_slider_x.observe(updateXRange,"value")

range_slider_y = createRangeSlider(0,140,10, "Y Range (left)")
range_slider_y.observe(updateYRange,"value")

range_slider_y_secondary = createRangeSlider(-10,40,1, "Y Range (Right)")
range_slider_y_secondary.observe(updateYRangeSecondary,"value")

# Allow user to select what to show
def toggleIndividualAttempts(show):
    global individual_attempts_are_hidden # This is needed to change global variables inside a function
    individual_attempts_are_hidden = not show.new
    for line in driving_lines_class_dict:
        driving_lines_class_dict[line].setHide(individual_attempts_are_hidden)
            
show_individual_attempts_checkbox = widgets.Checkbox(description="Show individual attempts", value=True)
show_individual_attempts_checkbox.observe(toggleIndividualAttempts, "value")

# TODO: Show failed attempts
def showFailedAttempts(show):
    for line in driving_lines_class_dict:
        driving_lines_class_dict[line].showIfFailed()
    
show_failed_attempts = widgets.Button(description="Show failed attempts")
show_failed_attempts.on_click(showFailedAttempts)
    
# TODO: Show successful attempts

# TODO: Show average speed

def toggleMeanSTD(show):
    for plot_obj in mean_std_class_dict:
        mean_std_class_dict[plot_obj].setHide(not show.new)

toggle_mean_std = widgets.Checkbox(description="Show mean & std", value=True)
toggle_mean_std.observe(toggleMeanSTD, "value")

# TODO: Show standard deviation

box_2 = widgets.HBox([range_slider_x, range_slider_y, range_slider_y_secondary])
box_3 = widgets.HBox([show_individual_attempts_checkbox, toggle_mean_std, show_failed_attempts])
box_1 = widgets.HBox([output, checkboxes])
widgets.VBox([box_1, box_2, box_3])

VBox(children=(HBox(children=(Output(), VBox(children=(VBox(children=(Label(value='Guess-o-meter'), Checkbox(v…

In [11]:
# Trying new way of plotting the data


#average_group_based
new = average_individual[(average_individual["Attempt nr"] == 1) & (average_individual["evisID"] == "DiffAndCOPE1")]
#new = new.pivot(index="distanceWindow", columns="userID", values="speed")
new2 = new.pivot(index="distanceWindow", columns="userID", values="energyUsage")

e_g = average_individual[(average_individual["Attempt nr"] == 1) & (average_individual["evisID"] == "GuessOMeter")]
e_g = e_g.pivot(index="distanceWindow", columns="userID", values="energyUsage")

#display(e_g.T.describe())
display(e_g)

test = new.apply(pd.Series.last_valid_index)
#test["speed"] = test.apply(lambda row: print(row))
#test["speed"] = ne
#display(test)

#display(test)

fig, ax = plt.subplots(figsize=(10,5))
ax.plot(average_road_height.set_index("distanceWindow"), color="k", alpha=.6, label="Road height")
ax.plot(e_g.T.mean(), color="C0", alpha=.6, label="Guess-o-meter")
ax.plot(new2.T.mean(), color="C1", alpha=.6, label="Diff + COPE1")


ax.legend()
plt.xlabel("Distance [m]")
plt.ylabel("Power usage [kW]")
#plt.grid(True)
#ax.plot(test, marker="o", color="C1")

#test
#average_individual
#new2.T.mean()

userID,1,3,7,9,11,13,15,17,19,21,23
distanceWindow,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,6.166424,4.999244,8.792993,4.388670,4.238992,2.009864,9.834271,15.679236,3.938086,2.312919,4.597740
50,14.314134,14.688637,17.935670,11.864701,5.505697,4.193317,12.107422,5.458806,4.800301,3.316201,9.621351
100,8.197425,10.155466,5.440780,11.240207,12.472710,5.650452,9.927969,22.826982,3.123125,5.092519,8.722411
150,3.059192,4.290610,5.252220,6.396047,4.328262,6.332382,3.195898,10.442840,15.971125,7.561269,5.589581
200,3.169777,3.311614,3.415577,9.710180,7.796997,7.258615,2.987181,1.185493,3.246132,9.517035,4.643012
...,...,...,...,...,...,...,...,...,...,...,...
7800,,,,0.628068,,7.620034,,,,0.631903,
7850,,,,1.489340,,7.519666,,,,1.409926,
7900,,,,2.838712,,8.257968,,,,2.878247,
7950,,,,4.305899,,9.317534,,,,5.044734,


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Power usage [kW]')

In [12]:
# Default barplot settings
width = 0.22
capsize = 5
color_diff = "#FFA85C"
color_diff_border = "#E06900"
color_guess = "#3F9CDE"
color_guess_border = "#1B679D"

plt.style.use("default")
params = {
    #'font.size': 15,
    #'axes.labelsize': 15,
    'axes.titlesize': 15,
    'legend.fontsize': 12,
    'xtick.labelsize': 13,
    'ytick.labelsize': 13
}
matplotlib.rcParams.update(params)

In [13]:
# Count the number of successes and fails in each attempt and each group

# All the successful attemps
s_df = average_individual[average_individual["distanceWindow"] == 8000]

s_a1_diff = s_df[(s_df["Attempt nr"] == 1) & (s_df["evisID"] == "DiffAndCOPE1")] # Successes in A1 using diff + COPE1
s_a1_guess = s_df[(s_df["Attempt nr"] == 1) & (s_df["evisID"] == "GuessOMeter")] # Successes in A1 using guess-o-meter
s_a2_diff = s_df[(s_df["Attempt nr"] == 2) & (s_df["evisID"] == "DiffAndCOPE1")] # Successes in A2 using diff + COPE1
s_a2_guess = s_df[(s_df["Attempt nr"] == 2) & (s_df["evisID"] == "GuessOMeter")] # Successes in A2 using guess-o-meter

# Create new dataframes with the successes for the two attempts
s_a1 = pd.concat([s_a1_diff, s_a1_guess])
s_a2 = pd.concat([s_a2_diff, s_a2_guess])

# Filter out those who did make it from the original list. Then group by user ID so there's only one row for each user
f_a1 = average_individual[(average_individual["Attempt nr"] == 1) & ~average_individual["userID"].isin(s_a1["userID"].values)]
f_a1_group = f_a1.groupby("userID")

f_a2 = average_individual[(average_individual["Attempt nr"] == 2) & ~average_individual["userID"].isin(s_a2["userID"].values)]
f_a2_group = f_a2.groupby("userID")

# This will create a dataframe with the last element from each group!
fails_a1 = pd.concat([f_a1_group.tail(1)])
fails_a2 = pd.concat([f_a2_group.tail(1)])

f_a1_diff = fails_a1[fails_a1["evisID"] == "DiffAndCOPE1"]
f_a1_guess = fails_a1[fails_a1["evisID"] == "GuessOMeter"]
f_a2_diff = fails_a2[fails_a2["evisID"] == "DiffAndCOPE1"]
f_a2_guess = fails_a2[fails_a2["evisID"] == "GuessOMeter"]

# Create a new dataframe with the number of successes and fails
s2_df = pd.DataFrame(data={
    "attempt nr": [1, 1, 2, 2],
    "evis": ["diff + COPE1", "guess-o-meter", "diff + COPE1", "guess-o-meter"],
    "number of successes": [len(s_a1_diff), len(s_a1_guess), len(s_a2_diff), len(s_a2_guess)],
    "number of fails": [len(f_a1_diff), len(f_a1_guess), len(f_a2_diff), len(f_a2_guess)]
})


display(s2_df)



#s,t = stats.ttest_ind(section1d, section1g)

Unnamed: 0,attempt nr,evis,number of successes,number of fails
0,1,diff + COPE1,6,5
1,1,guess-o-meter,3,8
2,2,diff + COPE1,11,0
3,2,guess-o-meter,8,3


In [14]:
# Count the average distance traveled in each attempt and group
final_distance = needed_data.groupby(["Attempt nr", "evisID", "userID"], as_index=False)
final_datapoint = pd.concat([final_distance.tail(1)])
final_datapoint_grouped = final_datapoint.groupby(["Attempt nr", "evisID"])
final_datapoint_info = final_datapoint_grouped.describe()

#display(final_datapoint_info["distanceTraveled"])
f_d_d = final_datapoint_info["distanceTraveled"]

# Plot bar chart
fig_bars, ax_bars = plt.subplots()

evis = ["COPE1 + Diff", "Guess-o-meter"]
x_pos = np.arange(2)
width = 0.4

bar1 = ax_bars.bar(x_pos[0]-width/2, 
            f_d_d.iloc[0]["mean"], yerr=f_d_d.iloc[0]["std"], width=width, label="COPE1 + diff attempt #1", color=color_diff, hatch="//", edgecolor=color_diff_border, capsize=capsize)
bar3 = ax_bars.bar(x_pos[0]+width/2,
            f_d_d.iloc[1]["mean"], yerr=f_d_d.iloc[1]["std"], width=width, label="Guess-o-meter attempt #1", color=color_guess, hatch="//", edgecolor=color_guess_border, capsize=capsize)
bar2 = ax_bars.bar(x_pos[1]-width/2, 
            f_d_d.iloc[2]["mean"], yerr=f_d_d.iloc[2]["std"], width=width, label="COPE1 + diff attempt #2", color=color_diff, edgecolor=color_diff_border, capsize=capsize)
bar4 = ax_bars.bar(x_pos[1]+width/2,
            f_d_d.iloc[3]["mean"], yerr=f_d_d.iloc[3]["std"], width=width, label="Guess-o-meter attempt #2", color=color_guess, edgecolor=color_guess_border, capsize=capsize)

plt.xticks(x_pos, ('A1', 'A2'))
ax_bars.legend(handles=[bar1,bar3,bar2,bar4], loc="best")

ax_bars.yaxis.grid(True)
ax_bars.set_ylabel("Distance [m]")
ax_bars.set_title("Average distance traveled")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Average distance traveled')

### Averages and STD over specific sections. A1 & A2 combined
S1 = [0m, 3000m] The 40 km/h road  
S2 = [3000m, 6350m] Start of 110 km/h until the final downhill  
S3 = [6350m, 8000m] Final downhill till goal

In [15]:
#
sections = [[0, 3000], [3000, 6350], [6350, 8000]]

df_3 = average_individual.copy()

def getMeanAndSTD(window, evis, attempt):   
    # Select all individual drives within the given distance window span
    df_4 = df_3[
        (df_3["distanceWindow"] > window[0]) & (df_3["distanceWindow"] <= window[1]) &
        (df_3["evisID"] == evis) & 
        (df_3["Attempt nr"] == attempt)
    ]
    
    # Returns mean & std + other data for the columns
    df_4_values = df_4.describe()
    return df_4_values

def getMeanAndSTDForSections(sections):
    mean_std_data_frames = []
    for j,section in enumerate(sections):
        diff_values = []
        guess_values = []
        # Get the values for both diff & guess at attempt 1 & 2
        for i in range(2):
            diff = getMeanAndSTD(section, "DiffAndCOPE1", i+1)
            # Create a new dataframe containing only what we need
            speed_vals_diff = diff.loc[["mean","std"]][["speed"]]
            speed_vals_diff.columns = ["a" + str(i+1) + "_s" + str(j+1) + "_diff_speed"]
            speed_vals_diff = speed_vals_diff.T
    
            guess = getMeanAndSTD(section, "GuessOMeter", i+1)
            speed_vals_guess = guess.loc[["mean","std"]][["speed"]]
            speed_vals_guess.columns = ["a" + str(i+1) + "_s" + str(j+1) + "_guess_speed"]
            speed_vals_guess = speed_vals_guess.T
            
            diff_values.append(speed_vals_diff)
            guess_values.append(speed_vals_guess)
        
        mean_std_data_frames.extend(diff_values)
        mean_std_data_frames.extend(guess_values)
        
    return pd.concat(mean_std_data_frames)
    
section_data = getMeanAndSTDForSections(sections)

### Plot the bar chart with mean speeds over given road sections ###

fig_bars, ax_bars = plt.subplots(figsize=(8, 5))

matplotlib.rcParams.update(params)

evis = ["COPE1 + Diff", "Guess-o-meter"]
x_pos = np.arange(len(sections))
width = 0.22

# Plot each section separately
for i in range(int(len(sections))):
    row = i * 4
    bar1 = ax_bars.bar(x_pos[i] - width, 
                section_data.iloc[row]["mean"], yerr=section_data.iloc[row]["std"], 
                width=width, label="COPE1 + diff attempt #1", color=color_diff, hatch="//", edgecolor=color_diff_border, capsize=capsize)
    bar2 = ax_bars.bar(x_pos[i] + width, 
                section_data.iloc[row+1]["mean"], yerr=section_data.iloc[row+1]["std"], 
                width=width, label="COPE1 + diff attempt #2", color=color_diff, edgecolor=color_diff_border, capsize=capsize)
    bar3 = ax_bars.bar(x_pos[i] ,
                section_data.iloc[row+2]["mean"], yerr=section_data.iloc[row+2]["std"],
                width=width, label="Guess-o-meter attempt #1", color=color_guess, hatch="//", edgecolor=color_guess_border, capsize=capsize)
    bar4 = ax_bars.bar(x_pos[i] + 2 * width,
                section_data.iloc[row+3]["mean"], yerr=section_data.iloc[row+3]["std"],
                width=width, label="Guess-o-meter attempt #2", color=color_guess, edgecolor=color_guess_border, capsize=capsize)
    
plt.xticks(x_pos + width / 2, ('S1', 'S2', 'S3'))
plt.yticks(np.arange(0,101,10))

ax_bars.legend(handles=[bar1,bar3,bar2,bar4], loc="best")

ax_bars.yaxis.grid(True)
ax_bars.set_ylabel("Speed [km/h]")
ax_bars.set_title("Average speed at section S1, S2, S3 for both EVIS at attempt #1 & #2")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Average speed at section S1, S2, S3 for both EVIS at attempt #1 & #2')

### Find out if there's a statistically significant difference in average speed over certain distances between the groups
Calculate average speed among the drivers for each distance window within the given range.   
Perform a t-test between the two groups using the average speed at each distance window

In [16]:
#### TODO: Performe the test over all the sections comparing group A and B for each attempt
# TODO: Compare the difference within a group for each section between attempt 1 and 2

# All the averaged values for each individual
individual = average_individual[["userID","Attempt nr","evisID","distanceWindow","speed"]] # Get the columns we need
individual = individual[(individual["Attempt nr"] == whichAttempt) & (individual["evisID"] == "DiffAndCOPE1")] # Get one particular attempt and evis values
mean_speeds_diff = individual.pivot(index="distanceWindow", columns="userID", values="speed") # Get the average speeds for each distance window for each user

# All the averaged values for each individual
individual = average_individual[["userID","Attempt nr","evisID","distanceWindow","speed"]] # Get the columns we need
individual = individual[(individual["Attempt nr"] == whichAttempt) & (individual["evisID"] == "GuessOMeter")] # Get one particular attempt and evis values
mean_speeds_guess = individual.pivot(index="distanceWindow", columns="userID", values="speed") # Create a table with just the userID and average speeds at each distance window

### Enter a section that you wish to test the statistical difference between the two groups ###

# NOT Statistically significant
#section = [0,3000]
section = [3000,6350]
#section = [3200,3700]
#section = [6350, 8000]

# Statistically significant
#section = [7150, 8000] 

section = [3100,3800]


# Get the mean speeds for each user over the given section
mean_speeds_guess = mean_speeds_guess.loc[section[0]:section[1]].mean()
mean_speeds_guess = mean_speeds_guess.dropna()
mean_speeds_diff = mean_speeds_diff.loc[section[0]:section[1]].mean()
mean_speeds_diff = mean_speeds_diff.dropna()

#stats.normaltest(mean_speeds_guess.values)
s,p = stats.ttest_ind(mean_speeds_guess, mean_speeds_diff)

print("Section: " + str(section))
print("p-value: " + str(p))
prob_percent = round(p*1000)/10
print("Probability that the means are the same: " + str(prob_percent) + "%")

if p < 0.05:
    print("There's a statistical difference!")
else:
    print("No statistical difference, has to be < 5.0%")

print("\nMean speeds for each individual over the given section")
print("Guess-o-meter")
display(mean_speeds_guess)
print("Differentiated driving range + COPE1")
display(mean_speeds_diff)

Section: [3100, 3800]
p-value: 0.551532339603967
Probability that the means are the same: 55.2%
No statistical difference, has to be < 5.0%

Mean speeds for each individual over the given section
Guess-o-meter


userID
1     48.200050
3     45.806072
7     56.210778
9     72.430760
11    46.339700
13    51.119432
15    77.613463
17    47.721242
19    51.510444
21    37.091573
23    33.991695
dtype: float64

Differentiated driving range + COPE1


userID
2     30.209329
4     66.215755
6     41.357630
8     63.917231
12    37.465446
14    30.228723
18    53.921206
20    52.745297
22    51.203628
24    48.315000
26    56.253114
dtype: float64

# Average & STD over sections A1

In [17]:
# 1
which_attempt = 1

section_1 = [0, 3000]
s1_diff = getMeanAndSTD(section_1, "DiffAndCOPE1", which_attempt)
s1_guess = getMeanAndSTD(section_1, "GuessOMeter", which_attempt)

section_2 = [3000, 6350]
s2_diff = getMeanAndSTD(section_2, "DiffAndCOPE1", which_attempt)
s2_guess = getMeanAndSTD(section_2, "GuessOMeter", which_attempt)

section_3 = [6350, 8000]
s3_diff = getMeanAndSTD(section_3, "DiffAndCOPE1", which_attempt)
s3_guess = getMeanAndSTD(section_3, "GuessOMeter", which_attempt)


diff_mean = [s1_diff["speed"]["mean"], s2_diff["speed"]["mean"], s3_diff["speed"]["mean"]]
diff_std = [s1_diff["speed"]["std"], s2_diff["speed"]["std"], s3_diff["speed"]["std"]]

guess_mean = [s1_guess["speed"]["mean"], s2_guess["speed"]["mean"], s3_guess["speed"]["mean"]]
guess_std = [s1_guess["speed"]["std"], s2_guess["speed"]["std"], s3_guess["speed"]["std"]]

plt.style.use("default")
params = {
    'font.size': 15,
    'axes.labelsize': 15,
    'axes.titlesize': 13,
    'legend.fontsize': 12,
    'xtick.labelsize': 15,
    'ytick.labelsize': 15
}
matplotlib.rcParams.update(params)

fig_bars, ax_bars = plt.subplots()

evis = ["COPE1 + Diff", "Guess-o-meter"]
x_pos = np.arange(3)
width = 0.35

ax_bars.bar(x_pos, diff_mean, yerr=diff_std, width=width, label="COPE1 + diff", hatch="//", color=color_diff, edgecolor=color_diff_border, capsize=10)
ax_bars.bar(x_pos+width, guess_mean, yerr=guess_std, width=width, label="Guess-o-meter", hatch="//", color=color_guess, edgecolor=color_guess_border, capsize=10)

plt.xticks(x_pos + width / 2, ('S1', 'S2', 'S3'))
plt.yticks(np.arange(0,101,10))

#fig_bars.legend(bbox_to_anchor=(1.1, 0.9))
ax_bars.legend(loc="best")

ax_bars.yaxis.grid(True)
ax_bars.set_ylabel("Speed [km/h]")
ax_bars.set_title("Average speed at section S1, S2, S3 for both EVIS")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Average speed at section S1, S2, S3 for both EVIS')

# Average & STD over sections A2

In [18]:
# 2
which_attempt = 2

section_1 = [0, 3000]
s1_diff = getMeanAndSTD(section_1, "DiffAndCOPE1", which_attempt)
s1_guess = getMeanAndSTD(section_1, "GuessOMeter", which_attempt)

section_2 = [3000, 6350]
s2_diff = getMeanAndSTD(section_2, "DiffAndCOPE1", which_attempt)
s2_guess = getMeanAndSTD(section_2, "GuessOMeter", which_attempt)

section_3 = [6350, 8000]
s3_diff = getMeanAndSTD(section_3, "DiffAndCOPE1", which_attempt)
s3_guess = getMeanAndSTD(section_3, "GuessOMeter", which_attempt)


diff_mean = [s1_diff["speed"]["mean"], s2_diff["speed"]["mean"], s3_diff["speed"]["mean"]]
diff_std = [s1_diff["speed"]["std"], s2_diff["speed"]["std"], s3_diff["speed"]["std"]]

guess_mean = [s1_guess["speed"]["mean"], s2_guess["speed"]["mean"], s3_guess["speed"]["mean"]]
guess_std = [s1_guess["speed"]["std"], s2_guess["speed"]["std"], s3_guess["speed"]["std"]]

plt.style.use("default")
params = {
    'font.size': 15,
    'axes.labelsize': 15,
    'axes.titlesize': 13,
    'legend.fontsize': 12,
    'xtick.labelsize': 15,
    'ytick.labelsize': 15
}
matplotlib.rcParams.update(params)

fig_bars, ax_bars = plt.subplots()

evis = ["COPE1 + Diff", "Guess-o-meter"]
x_pos = np.arange(3)
width = 0.35

ax_bars.bar(x_pos, diff_mean, yerr=diff_std, width=width, label="COPE1 + diff", hatch="", color=color_diff, edgecolor=color_diff_border, capsize=10)
ax_bars.bar(x_pos+width, guess_mean, yerr=guess_std, width=width, label="Guess-o-meter", hatch="", color=color_guess, edgecolor=color_guess_border, capsize=10)

plt.xticks(x_pos + width / 2, ('S1', 'S2', 'S3'))
plt.yticks(np.arange(0,101,10))

#fig_bars.legend(bbox_to_anchor=(1.1, 0.9))
ax_bars.legend(loc="best")

ax_bars.yaxis.grid(True)
ax_bars.set_ylabel("Speed [km/h]")
ax_bars.set_title("Average speed at section S1, S2, S3 for both EVIS")


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Average speed at section S1, S2, S3 for both EVIS')