In [1]:
import numpy as np
import json
import os

In [2]:
def readData():
    dataFolder = "./data/"
    files = os.listdir(dataFolder)
    data = []
    for file in files:
        path = dataFolder + file
        with open(path, "r") as f:
            raw = f.read()
            raw = raw.split("\n")
            for singleRaw in raw[:-1]:
                datapoint = json.loads(singleRaw)
                readings = datapoint["data"]
                hz = len(readings) / ((readings[-1][0] - readings[0][0]) * 10**-9)  # timestamp is in nanoseconds
                if (hz > 40):
                    data += [datapoint]
    return data

data = readData()

In [3]:
def normalise(dataset):
    mu = np.mean(dataset, axis=0)
    sigma = np.std(dataset, axis=0)
    return (dataset-mu) / sigma

def convertData(data):
    X, T, truth, misc = [], [], [], []
    for d in data:
        id = d["id"]
        drunk = d["drunk"]
        walking = d["walking"]
        readings = d["data"]
        firstTimestamp = readings[0][0]
        convertedReadings = []
        timestamps = []
        for r in readings:
            t, x, y, z = r
            timestamp = t - firstTimestamp  # slide so all timestamps start at 0
            if timestamp < 30 * 10**9 and len(convertedReadings)<1200:
                convertedReadings += [ [x, y, z] ]
                timestamps += [t]
            if timestamp < 0:
                break
        else:
            if len(convertedReadings) == 1200:
                X += [normalise(np.array(convertedReadings))]
                T += [timestamps]
                truth += [drunk]
                misc += [id, walking]
    
    X = np.array(X)
    T = np.array(T)
    truth = np.array(truth)
    misc = np.array(misc)
    return X, T, truth, misc

X, T, y, misc = convertData(data)

In [4]:
def analyse(X, y, function, label=''):
    drunk = [function(x) for x, l in zip(X,y) if l]
    sober = [function(x) for x, l in zip(X,y) if not l]
    if label:
        print(label)
    print(f" drunk: {np.mean(drunk)}, {np.std(drunk)}")
    print(f" sober: {np.mean(sober)}, {np.std(sober)}")

analyse(X,y, lambda x: np.std(np.abs(x)), "std")

analyse(X, y, np.abs, 'abs')

analyse(X, y, np.mean, "mean")

std
 drunk: 0.7368724786990721, 0.09892494222525465
 sober: 0.6304802762548293, 0.03509731985198614
abs
 drunk: 0.6573243439687027, 0.753607793766834
 sober: 0.774696403484295, 0.6323333633681668
mean
 drunk: 8.836926701056805e-18, 1.2086175488611933e-16
 sober: 4.389329712289346e-18, 5.638922308050554e-17
