In [1]:
# Libraries
%matplotlib widget
#%matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.patheffects as mpe
from IPython.display import display
import ipywidgets as widgets
import pandas as pd
import numpy as np
import matplotlib
import sys
from scipy import stats
import math

In [2]:
# Full width cells
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Load data and clean it

## Read .csv & rename columns

In [54]:
# Load our data from file
LocationAllData = "../data-cleanup/merged-cleaned-csv/driving_data_merged_1_to_28.csv"
df = pd.read_csv(LocationAllData)

# Give the columns new names
df = df.rename(columns={
    "Attempt nr":"attempt", "userID": "user_id", "evisID":"evis_id", "timeStamp":"timestamp", "currentStateOfCharge":"current_soc", "energyConsumed":"energy_consumed", "energyUsage":"power_usage",
    "distanceTraveled":"distance_traveled", "throttlePosition":"throttle_position", "breakPosition":"break_position", "steeringWheelRot":"wheel_rot", "yPosition":"road_height"
})

## Assign *distance window*
Add a new column with a so called "distance_window" this will be used to group data points in order to calculate average over distance

In [55]:
# Decide which distance interval to average over
distanceWindowWidth = 50

def assignDistanceWindow(distance):
    distanceWindowIndex = round(distance/distanceWindowWidth)
    return distanceWindowWidth * distanceWindowIndex

# Add a new row which containts which distance group each row belongs to
df["distance_window"] = df["distance_traveled"].apply(assignDistanceWindow)

## Exclusion of participants

Here we should exclude participants who did not execute the task properly, e.g not using the dashboard at all or misunderstanding the task  
List of participants I think should be excluded with a reason attached

* id:5 - Guess - Participant didn't use the Range Estimate number in the dashboard  
* id:10 - Diff - Participant didn't use bars at all, only focused on the speed and energy usage  
* id:16 - Diff - Didn't understand the blue line  
* id:25 - Guess - Seemed like the participant didn't understand that going at a lower speed was allowed

In [53]:
# Id of participants we want to exclude from the study
exclusion_list = [10, 15, 5, 25, 16]
# Remove all rows which are of participants who are in the exclusion_list
df_excluded = df[~df.user_id.isin(exclusion_list)]

# Calculation of averages and STD

## Calculate mean over distance for all attempts separately
I.e a new data frame is created where each attempt performed by a single participant is averaged over each distance window.  
Resulting in fewer rows but with a column with fixed distances

In [33]:
average_individual = df.groupby(["attempt", "evis_id", "user_id", "distance_window"]).mean().reset_index()
#average_individual

## Calculate mean and STD of desired variables for the 4 different groups
These are the four defined groups
1. Diff + COPE1 - Attempt #1
2. Diff + COPE1 - Attempt #2
3. Guess-o-meter - Attempt #1
4. Guess-o-meter - Attempt #2



In [32]:
average_groups = average_individual.groupby(["attempt", "evis_id", "distance_window"], as_index=False)

average_group_based = average_groups[["speed", "current_soc", "road_height", "energy_consumed"]].agg(
    {"speed": ["mean", "std"], 
     "current_soc": ["mean", "std"], 
     "energy_consumed": ["mean", "std"],
     "road_height": "mean"
    })

#average_group_based

# Count successful attempts 

In [104]:
#s_df = average_individual[average_individual.distance_window == 8000]
    
# Group based on attempt, evis and user id.
each_user = average_individual.groupby(["attempt", "evis_id","user_id"])
# Create a new df with the last datapoint for each attempt
each_user_final_data = pd.concat([s_df.tail(1)])

each_user_final_data

Unnamed: 0,attempt,evis_id,user_id,distance_window,timestamp,current_soc,energy_consumed,power_usage,guesstimatedDistanceLeft,speed,distance_traveled,throttle_position,break_position,wheel_rot,xPosition,road_height,zPosition
95,1,DiffAndCOPE1,2,4750,345.870368,0.003465,1.198095,21.238237,-1.0,122.305602,4743.907859,1.000015,0.0,0.001101,496.360019,4.331654,-1486.36491
208,1,DiffAndCOPE1,4,5600,390.986344,0.005871,1.201622,37.608485,-1.0,99.179912,5592.209697,0.925794,0.0,0.004242,1092.62001,7.094248,-2073.061858
369,1,DiffAndCOPE1,6,8000,602.510747,0.076932,1.13504,11.53311,-1.0,60.608147,7991.847824,0.709299,0.0,-0.006751,384.681581,-5.5313,-4004.636206
530,1,DiffAndCOPE1,8,8000,607.853099,0.091116,1.134222,3.64409,-1.0,72.308482,7987.400118,0.670077,0.0,-0.002103,384.235915,-5.542229,-4008.305658
630,1,DiffAndCOPE1,10,4950,386.069381,0.011953,1.209225,14.40315,-1.0,41.039886,4946.493568,0.729153,0.0,0.000653,629.452628,3.781182,-1622.623485
791,1,DiffAndCOPE1,12,8000,739.36688,0.061866,1.256998,2.729032,-1.0,33.535548,7991.027557,0.503906,0.0,-0.001838,384.341643,-5.530853,-4005.180211
893,1,DiffAndCOPE1,14,5050,360.176888,0.006309,1.224791,1.584491,-1.0,25.867628,5048.805423,0.315639,0.0,-0.00012,701.415658,3.836373,-1693.354411
1004,1,DiffAndCOPE1,16,5500,383.804935,0.011865,1.195134,45.736984,-1.0,104.393812,5497.357143,0.994546,0.0,0.007866,1025.917724,5.989568,-2003.985066
1165,1,DiffAndCOPE1,18,8000,697.992404,0.041091,1.176636,4.688327,-1.0,67.400075,7993.405934,0.691412,0.0,-0.00504,385.150822,-5.529033,-4004.041629
1326,1,DiffAndCOPE1,20,8000,619.354238,0.058458,1.177708,66.30738,-1.0,81.755188,7990.664955,1.000015,0.0,-0.007543,387.70045,-5.535756,-4008.505136


In [None]:
# Count the number of successes and fails in each attempt and each group

# All the successful attemps
s_df = average_individual[average_individual["distanceWindow"] == 8000]

s_a1_diff = s_df[(s_df["Attempt nr"] == 1) & (s_df["evisID"] == "DiffAndCOPE1")] # Successes in A1 using diff + COPE1
s_a1_guess = s_df[(s_df["Attempt nr"] == 1) & (s_df["evisID"] == "GuessOMeter")] # Successes in A1 using guess-o-meter
s_a2_diff = s_df[(s_df["Attempt nr"] == 2) & (s_df["evisID"] == "DiffAndCOPE1")] # Successes in A2 using diff + COPE1
s_a2_guess = s_df[(s_df["Attempt nr"] == 2) & (s_df["evisID"] == "GuessOMeter")] # Successes in A2 using guess-o-meter

# Create new dataframes with the successes for the two attempts
s_a1 = pd.concat([s_a1_diff, s_a1_guess])
s_a2 = pd.concat([s_a2_diff, s_a2_guess])

# Filter out those who did make it from the original list. Then group by user ID so there's only one row for each user
f_a1 = average_individual[(average_individual["Attempt nr"] == 1) & ~average_individual["userID"].isin(s_a1["userID"].values)]
f_a1_group = f_a1.groupby("userID")

f_a2 = average_individual[(average_individual["Attempt nr"] == 2) & ~average_individual["userID"].isin(s_a2["userID"].values)]
f_a2_group = f_a2.groupby("userID")

# This will create a dataframe with the last element from each group!
fails_a1 = pd.concat([f_a1_group.tail(1)])
fails_a2 = pd.concat([f_a2_group.tail(1)])

f_a1_diff = fails_a1[fails_a1["evisID"] == "DiffAndCOPE1"]
f_a1_guess = fails_a1[fails_a1["evisID"] == "GuessOMeter"]
f_a2_diff = fails_a2[fails_a2["evisID"] == "DiffAndCOPE1"]
f_a2_guess = fails_a2[fails_a2["evisID"] == "GuessOMeter"]

# Create a new dataframe with the number of successes and fails
s2_df = pd.DataFrame(data={
    "attempt nr": [1, 1, 2, 2],
    "evis": ["diff + COPE1", "guess-o-meter", "diff + COPE1", "guess-o-meter"],
    "number of successes": [len(s_a1_diff), len(s_a1_guess), len(s_a2_diff), len(s_a2_guess)],
    "number of fails": [len(f_a1_diff), len(f_a1_guess), len(f_a2_diff), len(f_a2_guess)]
})


display(s2_df)