This notebook takes the data from `../datasets/real/2019-10-18/raw` and presents it in a way the model can digest. There are two types or inputs/outputs we want to be able to generate:

- Motor Predictions (MP): given a starting position and a final position, how many motor ticks does it take?
- Position Predictions (PP): given a starting position and a number of motor ticks, where will the platform end?

Both problems have multiple solutions: if a cable is not tensed, there are multiple possibilities for motor control to end up in a given position. And when a starting position comprises a slack cable, it is impossible to guess the amount of ticks that are necessary to bring it back to a tensed position.



In [2]:
import os
import numpy as np

In [23]:
np.set_printoptions(suppress=True,
   formatter={'float_kind':'{:.2f}'.format})


In [22]:
# State machine:
# 0: skip everything until a "f -1 ..." line happens, which means a new record session is happening and the timestamps are being reset
# 1: wait until the positions returned by "f ..." change. It takes me typically a few seconds to set artoolkit and to start the random sequence
# 2: use the data

def parse_file(f):
    #print(f)
    state = 0
    ticks = list()
    poses = list()
    for l in open(f).read().split("\n"):
        arr=l.rstrip(" ").split(" ")
        if len(arr)>2:
            if state == 0:
                if arr[0]=="f" and arr[1]=="-1":
                    old_ticks = " ".join(arr[2:])
                    state = 1
            if state == 1:
                if arr[0]=="f" and " ".join(arr[2:]) != old_ticks:
                    state = 2
            if state == 2:
                if arr[0]=="f":
                    ticks.append([float(arr[1])]+[int(x) for x in arr[2:]])
                if arr[0]=="p":
                    poses.append([float(x) for x in arr[1:]])
    return ticks, poses

In [17]:
# Now we have ticks and poses, with aligned timestamps, we can create data points which will comprise 
# two poses and a delta of ticks.
# A few things I know from how the data is generate:
# - it is guaranteed that each "f" line will have a "p" line with the exact same timestamp
# - movements were generated as 3 seconds straight movements
# - There were all kind of oscillations and imperfections in the movements so I am expecting +/- 15° error in the
#   orientation and mayb +/- 5 cm in the position
# - As a consequence, I think pairs that are a long time (and distance) apart bring more information than pairs 
#   that are close

def create_training_pairs(ticks, poses):
    # match ticks and poses of same timestamp:
    t_ts=dict()
    p_ts=dict()
    for t in ticks:
        t_ts[t[0]] = t[1:]
    for p in poses:
        p_ts[p[0]] = p[1:]
    fullposes=list()
    for ts in t_ts.keys():
        if ts in p_ts:
            fullposes.append([ts]+t_ts[ts]+p_ts[ts])
            
    # Now we make pairs of decreasing length until we arrive at 3 sec of timestamp delta
    # Note: it is possible to generate MUCH MORE pairs from these data.
    
    start_ind = 0
    end_ind = len(fullposes)-1
    pairs=list()
    while(fullposes[end_ind][0]-fullposes[start_ind][0]>3.0):
        delta_ticks = [fullposes[end_ind][i]-fullposes[start_ind][i] for i in range(1,5)]
        pairs.append(fullposes[start_ind][5:] + delta_ticks + fullposes[end_ind][5:])
        start_ind+=1
        end_ind-=1
    return pairs
    
    

In [18]:
def normalize_columns(arr, cols=[]):
    norm_factors=list()
    for c in cols:
        v = arr[:, c]
        offset = v.min()
        scale = v.max() - v.min()
        arr[:, c] = (v - v.min()) / (v.max() - v.min())
        norm_factors.append((c, offset, scale))
    return norm_factors

In [19]:
def load_dataset(dirname):
    pairs = list()
    for f in os.listdir(dirname):
        t,p = parse_file(dirname+f)
        pairs += create_training_pairs(t,p)
    src_poses = np.array(pairs)[...,0:7]
    delta_ticks = np.array(pairs)[...,7:11]
    dst_poses = np.array(pairs)[...,11:]

    # mp_ = Motors Prediction model
    # pp_ = Position Prediction model

    pp_input = np.concatenate((src_poses, delta_ticks), axis=1)
    pp_output = np.array(dst_poses)

    mp_input = np.concatenate((src_poses, dst_poses), axis=1)
    mp_output = np.array(delta_ticks)
    
    normalize_columns(pp_input, [4,5,6,7,8,9,10])
    normalize_columns(pp_output, [4,5,6])
    normalize_columns(mp_input, [4,5,6,11,12,13])
    normalize_columns(mp_output, [0,1,2,3])
    
    return(pp_input, pp_output, mp_input, mp_output)


In [20]:
load_dataset("../datasets/real/2019-10-18/raw/")

(array([[0.66, -0.14, -0.15, ..., 0.61, 0.51, 0.42],
        [0.66, -0.14, -0.15, ..., 0.61, 0.52, 0.42],
        [0.66, -0.14, -0.16, ..., 0.61, 0.52, 0.41],
        ...,
        [0.63, -0.29, -0.35, ..., 0.50, 0.73, 0.41],
        [0.62, -0.28, -0.35, ..., 0.50, 0.72, 0.41],
        [0.61, -0.29, -0.36, ..., 0.51, 0.71, 0.41]]),
 array([[0.76, -0.24, -0.11, ..., 0.34, 0.61, 0.95],
        [0.76, -0.25, -0.11, ..., 0.33, 0.61, 0.96],
        [0.76, -0.25, -0.11, ..., 0.33, 0.61, 0.96],
        ...,
        [0.61, -0.31, -0.34, ..., 0.40, 0.26, 0.12],
        [0.60, -0.30, -0.34, ..., 0.40, 0.27, 0.12],
        [0.61, -0.31, -0.34, ..., 0.40, 0.28, 0.13]]),
 array([[0.66, -0.14, -0.15, ..., 0.34, 0.61, 0.95],
        [0.66, -0.14, -0.15, ..., 0.33, 0.61, 0.96],
        [0.66, -0.14, -0.16, ..., 0.33, 0.61, 0.96],
        ...,
        [0.63, -0.29, -0.35, ..., 0.40, 0.26, 0.12],
        [0.62, -0.28, -0.35, ..., 0.40, 0.27, 0.12],
        [0.61, -0.29, -0.36, ..., 0.40, 0.28, 0.13]]),
 

Ugh, I finally understand why so many people criticize jupyter. I'll make a proper python module with all the functions in that notebook and call it fae_loader.