# Data Analysis for 3DUI Honours Project Rotation Controllers
_By Steven Rybicki_

## Introduction

In [27]:
import json
import os
import math
import csv
import numpy as np 
import pandas as pd 
from datetime import datetime
from scipy import stats
from collections import defaultdict, namedtuple

In [117]:
SUS_STAGES = [
    13,
    18,
    23
]

TASK_STAGES = [
    12,
    17,
    22
]

TRAINING_STAGES = [
    10,
    15,
    20
]

controllerOptions = [["discrete", "twoaxis", "arcball"], ["discrete", "arcball", "twoaxis"], ["twoaxis", "discrete", "arcball"], ["twoaxis", "arcball", "discrete"], ["arcball", "discrete", "twoaxis"], ["arcball", "twoaxis", "discrete"]]
modelOptions = [["models/mrt_model.json", "models/mrt_model_16a.json", "models/mrt_model_23a.json"], ["models/mrt_model.json", "models/mrt_model_23a.json", "models/mrt_model_16a.json"], ["models/mrt_model_16a.json", "models/mrt_model.json", "models/mrt_model_23a.json"], ["models/mrt_model_16a.json", "models/mrt_model_23a.json", "models/mrt_model.json"], ["models/mrt_model_23a.json", "models/mrt_model.json", "models/mrt_model_16a.json"], ["models/mrt_model_23a.json", "models/mrt_model_16a.json", "models/mrt_model.json"]]


MRT_STAGE = 5

ORIENTATION_TASK_QUAT = {
    "0_0": [0.25881904510252074,0,0,0.9659258262890683],
    "0_1": [0,0,0.7071067811865475,0.7071067811865476],
    "0_2": [0.3314135740355918,0.4619397662556433,0.19134171618254486,0.8001031451912656],
    "0_3": [0.2185080122244105,0.21850801222441052,0.6724985119639573,0.6724985119639574],
    "1_0": [0.04799966634373737,0.7856544802373238,0.11573312388031406,-0.6058456187435991],
    "1_1": [-0.9124677834418226,0.3563393717894529,0.16246581417319952,-0.11844684680693215],
    "1_2": [0.060676044258967846,0.7425536630010398,-0.09767856431944423,-0.6598419305328469]
}

INSPECTION_TASK_QUAT = {
    "2_0":[0.6228684391534324,-0.47348903397844655,0.3567527821077567,-0.5104610608725213],
    "2_1":[-0.533966817396014,-0.5651774758681182,-0.4601635612922124,-0.428606294342702],
    "2_2":[-0.05030822616001109,0.9948785161788547,-0.08766878450590904,0.00006818834234121639],
    "2_3":[-0.000049693290986342606,0.7673132076626328,0.00004187190621293386,0.6412725139313251],
    "2_4":[0.04175826956285454,0.7751479845620929,0.018942573730028438,-0.6301135039442686]
}

sus_questions = [
    "I think that I would like to use this system frequently",
    "I found the system unnecessarily complex",
    "I thought the system was easy to use",
    "I think that I would need the support of a technical person to be able to use this system",
    "I found the various functions in this system were well integrated",
    "I thought there was too much inconsistency in this system",
    "I would imagine that most people would learn to use this system very quickly",
    "I found the system very cumbersome to use",
    "I felt very confident using the system",
    "I needed to learn a lot of things before I could get going with this system",
]

mrt_answers =\
    [[1, 3],
    [1, 4],
    [2, 4],
    [2, 3],
    [1, 3],
    [1, 4],
    [2, 4],
    [2, 3],
    [2, 4],
    [1, 4],
    [3, 4],
    [2, 3],
    [1, 2],
    [2, 4],
    [2, 3],
    [1, 4],
    [2, 4],
    [2, 3],
    [1, 3],
    [1, 4],
    [2, 4],
    [2, 3],
    [1, 4],
    [1, 3]]
    
CONFOUNDING_CAT_COLS = [
    "controller_choice",
    "model_choice",
    "Biological Sex",
    "Faculty",
    "Major",
    "What type of computer software do you own or use?  [Word processing]",
    "What type of computer software do you own or use?  [Statistics programs]",
    "What type of computer software do you own or use?  [Games]",
    "What type of computer software do you own or use?  [Art/drawing]",
    "Do you participate in any extra-curricular sports?",
    "Do you own any video game systems?",
    "Do you own a computer?",
    "How long have you owned/been using a computer?",
    "Frequency of use [Using a computer]",
    "Frequency of use [Purchasing software]",
    "Frequency of use [Use the internet]",
    "Frequency of use [Use any video game systems]",
    "Frequency of use [Play boardgames]",
    "Frequency of use [Use maps]",
    "Frequency of use [Use 3D Modelling Software (e.g. Google SketchUp, Maya, Blender, Autodesk)]",
    "Proficiency or Skill [Using maps]",
    "Proficiency or Skill [Using computers]",
    "Proficiency or Skill [Playing video games]",
    "Proficiency or Skill [3D Modelling Software (e.g. Google SketchUp, Maya, Blender, Autodesk)]",
    "Level of Degree"
] 

CONFOUNDING_NUM_COLS = [
    "Age",
    "How many science courses (or modules) have you taken in the past year? (Please don't include maths courses in this total)",
    "How many maths courses (or modules) have you taken in the past year?",
    "What was your final Matric mark for Maths?",
    "What was your final Matric mark for English?",
]
    
CONTROLLERS = ["twoaxis","arcball","discrete"]
directory = "/Users/stevenrybicki/testing-interface/data/study"
factor_file = "/Users/stevenrybicki/testing-interface/data/study/.info.csv"

In [118]:
Event = namedtuple('Event', ["meta", "data"])
EventMeta = namedtuple('EventMeta', ["uuid","pipeline_index","participant_number", "date"])
EventData = namedtuple('EventData', ["raw"])
MRTData = namedtuple('MRTData', ["marks", "score"])
SUSData = namedtuple('SUSData', ["marks", "adj_marks", "score", "total", "controller", "text"])
TaskData = namedtuple('TaskData', ["meta", "info", "score"]);
TaskMeta = namedtuple('TaskMeta', ["type", "num", "repetition", "date"])
TaskInfo = namedtuple('TaskInfo', ["controller", "group", "index", "model", "rotation", "quaternion"])
TaskScore = namedtuple('TaskScore', ["time", "accuracy"])
Experiment = namedtuple('Experiment', ["num", "controllers", "models", "controller_choice", "model_choice"])

In [119]:
def inner_prod(q_1, q_2):
    s = 0
    for a,b in zip(q_1, q_2):
        s += a*b
    return s

def dist(q_1, q_2):
    return math.acos(2 * (inner_prod(q_1, q_2)**2) - 1)

def mult(q_1, q_2):
    x1, y1, z1, w1 = q_1
    x2, y2, z2, w2 = q_2
    w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
    x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
    y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
    z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
    return (x, y, z, w)

def conj(q):
    x, y, z, w = q
    return (-x, -y, -z, w)

def rotate(vec, q):
    q_2 = vec + (0.0,)
    return mult(mult(q, q_2), conj(q))[:-1]

def gen_experiment(num):
    """
    Determine the order in which controllers and models were presented to a participant with a given number
    """
    controllerChoiceNum = num % len(controllerOptions)
    modelChoiceNum = int(num / len(modelOptions)) % len(modelOptions)
    controllerChoice = controllerOptions[controllerChoiceNum]
    modelChoice = modelOptions[modelChoiceNum]
    return Experiment(num, controllerChoice, modelChoice, controllerChoiceNum, modelChoiceNum)

def paths(directory):
    """
    Gives all the paths in the directory that don't begin with "."
    """
    return list(map(lambda x: "{0}/{1}".format(directory, x), filter(lambda x: x[0] != ".", os.listdir(directory))))

def parse_path(path):
    """
    Takes a path, reads it in, gives back the JSON
    """
    data = None
    with open(path) as f:
        file_data = "\n".join(f.readlines()).strip() # remove final newline
        file_data = file_data[1:-1] # remove quotation signs
        data = json.loads(file_data)
    return data

def data(directory):
    """
    Gets the parsed json data for each file in the directory
    """
    return list(map(lambda x: parse_path(x), paths(directory)))

def get_meta_from(event):
    return event[2]

def get_num_from(event):
    """
    Get the participant number from a specific event log
    """
    meta = get_meta_from(event)
    if "participant_number" in meta:
        return meta["participant_number"]
    else:
        return 0

def get_stage_from(event):
    """
    Get the stage number from a specific event log
    """
    return get_meta_from(event)["pipeline_index"]

def get_date_from(event):
    """
    Get the event from a specific event log
    """
    return parse_date(event[1])

def parse_date(js_date):
    """
    Example input date: "2015-09-03T20:03:09.429Z"
    This is then transformed to
    2015-09-03 20:03:09

    Which is then made into a python datetime object
    Gives all the paths in the directory that don't begin with "."
    """
    template = "%Y-%m-%d %H:%M:%S"
    formatted_date = js_date.replace("T"," ").replace("Z","").split(".")[0]
    return datetime.strptime(formatted_date, template)

def build_people(data, factors):
    """
    From parsed json data, get a list of people
    """
    people = dict()
    for datum in data:
        for event in datum:
            num = get_num_from(event)
            if num:
                if num not in people:
                    people[num] = Person(num)
                people[num].add_event(event)
    
    for num in factors:
        if num in people:
            people[num].set_factors(factors[num])
    return people

def get_factors(path):
    """
    Parse the factors file
    """
    factors = dict()
    with open(path) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            num = int(row["Participant no"])
            factors[num] = row
    return factors
    
def process():
    full_data = data(directory)
    factors = get_factors(factor_file)
    people = build_people(full_data, factors)
    return people

def check_valid(people):
    entries = []
    for num in people:
        person = people[num]
        entries.append(person.validate())
    return entries

class Person:
    def __init__(self, num):
        self.num = num
        self.experiment = gen_experiment(num)
        self.raw_events = []
        self.events = []
        self.mrt_results = []
        self.sus_results = []
        self.tasks = defaultdict(lambda : defaultdict(dict))
        self.training_tasks = defaultdict(lambda : defaultdict(dict))
        self.factors = dict()
        self.event_processors = [
            (
                lambda event_meta, event_data:
                    event_data[0] == "mrt test results",
                self.process_mrt
            ),
            (
                lambda event_meta, event_data:
                    event_data[0] == "sus evaluation results",
                self.process_sus
            ),
            (
                lambda event_meta, event_data:
                    event_data[0] == "saving user task",
                self.process_task
            )
        ]
        
    def set_factors(self, factors):
        self.factors = factors

    def process_mrt(self, event_meta, event_data):
        template = "test_{0}_{1}"
        data = event_data[1]
        marks = []
        for i, answers in enumerate(mrt_answers):
            answer_1 = template.format(i, answers[0])
            answer_2 = template.format(i, answers[1])
            if all([((answer in data) and data[answer]) for answer in (answer_1, answer_2)]):
                marks.append(True)
            else:
                marks.append(False)
        mrt_data = MRTData(marks, marks.count(True))
        self.mrt_results.append(mrt_data)
        return mrt_data

    def process_sus(self, event_meta, event_data):
        controller = event_data[1]
        # Load events
        data = event_data[2]
        marks = [None]*10
        adj_marks = [None]*10
        for key in data:
            if data[key] and key != "text":
               num, answer = [int(x) for x in key.split("_")]
               marks[num] = answer
        # Calculate score
        score = 0
        total = 0
        for i in range(len(marks)):
            if marks[i] != None:
                if i % 2 == 0:
                    adj_marks[i] = marks[i]
                else:
                    adj_marks[i] = 5 - (marks[i] + 1)
                score += adj_marks[i]
                total += 4
        score *= 2.5
        total *= 2.5

        sus_data = SUSData(marks, adj_marks, score, total, controller, data["text"] if "text" in data else "")
        self.sus_results.append(sus_data)
        return sus_data

    def process_task(self, event_meta, event_data):
        group = event_data[1]
        index = event_data[2]
        rotation = event_data[3]
        quaternion = event_data[4]

        stage = event_meta.pipeline_index
        task_type = None
        repetition = None
        container = None
        if stage in TASK_STAGES:
            task_type = "task"
            repetition = TASK_STAGES.index(stage)
            container = self.tasks
        elif stage in TRAINING_STAGES:
            task_type = "training"
            repetition  = TRAINING_STAGES.index(stage)
            container = self.training_tasks
        controller = self.experiment.controllers[repetition]
        model = self.experiment.models[repetition]
        num = len(container[controller])
        time = (event_meta.date - self.events[-1].meta.date).total_seconds()
        accuracy = None
        key = "{0}_{1}".format(group, index)
        if group < 2:
            model_quat = ORIENTATION_TASK_QUAT[key]
            accuracy = dist(model_quat, quaternion)
        else:
            if group > 2:
                accuracy = 0 # ignore these for training data
            else:
                ref_quat = INSPECTION_TASK_QUAT[key]
                ref_vec = (0,0,1)
                start_vec = rotate(ref_vec, ref_quat)
                end_vec = rotate(ref_vec, quaternion)
                accuracy = math.acos(inner_prod(start_vec, end_vec))
                if accuracy > math.pi / 2.0:
                    accuracy -= math.pi / 2.0
        score = TaskScore(time, accuracy) # TODO
        info = TaskInfo(controller, group, index, model, rotation, quaternion)
        meta = TaskMeta(task_type, num, repetition, event_meta.date)
        task_data = TaskData(meta, info, score)
        container[controller][group][index] = task_data
        return task_data

    def add_event(self, event):
        self.raw_events.append(event)
        event_meta = EventMeta(
                event[2]["uuid"],
                event[2]["pipeline_index"],
                event[2]["participant_number"],
                parse_date(event[1]))

        event_data = EventData(event[3:])
        for processor in self.event_processors:
            if processor[0](event_meta, event[3:]):
                event_data = processor[1](event_meta, event[3:])
                break

        processed_event = Event(event_meta, event_data)
        self.events.append(processed_event)

    def events_at_stage(self, stage):
        return filter(lambda event: event.meta.pipeline_index == stage, self.events)

    def mrt(self):
        return self.mrt_results[0]

    def sus(self, controller):
        return list(filter(lambda data: data.controller == controller, self.sus_results))[0]

    def validate(self):
        num_mrt = len(self.mrt_results)
        sus_keys = {result.controller for result in self.sus_results}
        num_sus = len(self.sus_results)
        totals_sus = [result.total for result in self.sus_results]
        valid_elms = (
          num_mrt == 1,
          sus_keys == set(CONTROLLERS),
          num_sus == 3,
          all(total == 100 for total in totals_sus),
        )
        valid = all(valid_elms)
        return valid
    


In [122]:
def get_df(people):
    """
    Given a list of people, get dataframes to represent them
    """
    people_data = []
    people_indices = []
    task_data = []
    sus_data = []
    for num in people:
        person = people[num]
        people_indices.append(num)
        person_data = {
            "mrt_score": person.mrt().score,
            "controller_choice": person.experiment.controller_choice, 
            "model_choice": person.experiment.model_choice,
            "valid": person.validate()
        }
        
        for factor in person.factors:
            person_data[factor] = person.factors[factor]
        
        people_data.append(person_data)
        for controller in person.tasks:
            for group in person.tasks[controller]:
                for index in person.tasks[controller][group]:
                    task = person.tasks[controller][group][index]
                    task_data.append({
                            "person_num": num,
                            "time": task.score.time,
                            "accuracy": task.score.accuracy,
                            "controller": controller,
                            "group": group,
                            "index": index,
                            "type": task.meta.type,
                            "num_done": task.meta.num,
                            "repetition": task.meta.repetition, 
                            "date": task.meta.date,
                            "model": task.info.model
                    })
        for controller in CONTROLLERS:
            sus_result = person.sus(controller)
            sus_datum = {
                "person_num": num,
                "score": sus_result.score,
                "total": sus_result.total,
                "controller": controller,
                "text": sus_result.text,
            }
            for i, mark in enumerate(sus_result.adj_marks):
                sus_datum["q_{0}".format(i)] = mark
            sus_data.append(sus_datum)

    people_df = pd.DataFrame(people_data, index=people_indices)
    task_df = pd.DataFrame(task_data)
    sus_df = pd.DataFrame(sus_data)
    return (people_df, task_df, sus_df)

def df_report_per_controllers(df, cols, filter_sig = False):
  
    col_series = [df[df.controller == controller].loc[:,cols]
                               for controller in CONTROLLERS]
    
    return report_col_series(col_series, cols, CONTROLLERS, filter_sig)

def report_col_series(col_series, cols, index, filter_sig = False):
    pvals = []
    for col in cols:
        col_data = [col_series[i][col] for i in range(len(index))]
        f_val, p_val = stats.f_oneway(*col_data)
        pvals.append(p_val)
    
    report_df = pd.DataFrame([data.mean() for data in col_series], index=index)
    report_df = report_df.append(
        pd.DataFrame(
            [{
              cols[i]: pvals[i] for i in range(len(cols))  
            }],
            index = ["anova_p_val"]
        )
    )
    
    report_df = report_df.T
    if filter_sig:
        return report_df[report_df.anova_p_val < 0.05]
    else:
        return report_df

    
def task_score_df(task_df):
    person_nums = task_df["person_num"].unique()
    task_score_data = []
    for controller in CONTROLLERS:
        controller_df = task_df[task_df.controller == controller]
        for num in person_nums:
            person_data = controller_df[controller_df.person_num == num]
            datum = {"controller": controller, "person_num": num}
            for row in person_data.iterrows():
                row_data = row[1]
                group = row_data["group"]
                index = row_data["index"]
                for col in ["accuracy", "time"]: 
                    datum["q_{0}_{1} {2}".format(group, index, col)] = row_data[col]
            task_score_data.append(datum)
    task_score_df = pd.DataFrame(task_score_data)
    return task_score_df

def task_score_df_cols():
    num_in_each_group = [4, 3, 5]
    cols = ["q_{0}_{1} {2}".format(group, index, col) 
           for group in range(len(num_in_each_group))
           for index in range(num_in_each_group[group])
           for col in ["accuracy", "time"]]
    return cols
    

def df_report(people_df, task_df, sus_df):
    # Per question sus mean
    sus_q_cols = tuple(["q_{0}".format(i) for i in range(10)] + ["score"])
    print(df_report_per_controllers(sus_df, sus_q_cols, True))
    # Per task score/time mean
    ts_df = task_score_df(task_df)
    ts_df_cols = task_score_df_cols()
    
    report = df_report_per_controllers(ts_df, ts_df_cols, True)
    print(report)
    
def tasks_by(factor, task_cols, people_df, task_df, filter_sig = False):
    groups = people_df.groupby(factor)
    indices = [group.index.values for name, group in groups]
    
    task_groups = [task_df[task_df["person_num"].isin(index)].loc[:,task_cols]
                           for index in indices]
    

    return report_col_series(task_groups, task_cols, people_df[factor].unique(), filter_sig)


people = process()
df = get_df(people)
people_df = df[0]
task_df = df[1]
  

In [123]:
for cat in CONFOUNDING_CAT_COLS:
    print(cat)
    print("="*len(cat))
    print(tasks_by(cat,
              task_score_df_cols(), 
              people_df,
              task_score_df(task_df),
              True))
    print("")

controller_choice
                    1          2          3          4          5          0  \
q_0_1 time  21.266667  31.833333  22.388889  19.777778  28.066667  17.055556   

            anova_p_val  
q_0_1 time     0.001647  

model_choice
                        0          1          2          3          4  \
q_0_2 time      27.933333  22.222222  30.111111  20.277778  22.777778   
q_2_3 accuracy   0.420077   0.945315   0.419981   0.326546   0.724968   
q_2_3 time       7.866667   9.833333   7.055556   6.666667  10.833333   
q_2_4 time       7.000000   8.111111   7.388889   6.611111  10.444444   

                        5  anova_p_val  
q_0_2 time      17.066667     0.019374  
q_2_3 accuracy   0.453366     0.001638  
q_2_3 time       6.200000     0.003887  
q_2_4 time      11.933333     0.000922  

Biological Sex
                    Male    Female  anova_p_val
q_1_1 accuracy  0.686755  0.224275     0.003008
q_1_2 accuracy  0.318250  0.131997     0.004397
q_2_2 accuracy  0.339978

In [124]:
df_report(*df)      

Empty DataFrame
Columns: [twoaxis, arcball, discrete, anova_p_val]
Index: []
                 twoaxis    arcball   discrete  anova_p_val
q_0_3 accuracy  0.472246   0.192065   0.080876     0.038904
q_2_0 time      8.588235  10.441176  12.264706     0.014587
q_2_2 time      7.735294   8.235294  10.852941     0.015851
