# Notes

First iteration is to use existing csv_points to build input as it is.
I use standard LSTM with tf.keras

In [1]:
import os 
import csv
from datetime import datetime
import pandas as pd
import numpy as np
import tensorflow as tf

# Data prep

In [2]:
def filename2metadata(filename):
    meta = filename.split(' ')
    patientid = meta[0]
    evaluation = int(meta[3].split('=')[1])
    poi = meta[2]
    flagbeforeoperation = meta[4].split('=')[1]
    exerciseinfo = meta[5].split('_')
    exerciseid = int(exerciseinfo[0])
    exercisename = exerciseinfo[1]
    exercisedatestring = exerciseinfo[2].split('.')[0]
    return {
        'patientid': patientid,
        'evaluation': evaluation,
        'poi': poi,
        'exerciseid': exerciseid,
        'exercisename': exercisename,
        'exercisedate': datetime.strptime(exercisedatestring, '%Y-%m-%d-%H-%M-%S'),
    }

In [3]:
def csv2timeseriestensor(df):
    xs = list(df['x'],)
    

In [4]:
def read2object(path, filename):    
    df = pd.read_csv(path)
    df.drop('t', axis=1, inplace=True)
    
    xs = np.transpose(df.to_numpy())

    return {
        'meta': filename2metadata(filename),
        'timeseries': xs,
        'pd': df
    }

TODO:
- read all
- padding
- split: normally split should be before padding, and then in inference combination of padding and trimming shall apply
- classification


In [5]:
data = []
maxlength = 0
x_max = None
x_min = None
y_max = None
y_min = None
z_min = None
z_max = None
dirdata = os.path.join('data', 'csv_points')
for filename in os.listdir(dirdata):
    datum = read2object(os.path.join(dirdata, filename), filename)
    data.append(datum)
    maxlength = max(maxlength, datum['timeseries'].shape[1])
    if x_max is None:
        x_max = datum['pd']['x'].max()
    x_max = max(x_max, datum['pd']['x'].max())    
    if x_min is None:
        x_min = datum['pd']['x'].min()
    x_min = min(x_min, datum['pd']['x'].min()) 
    
    if y_max is None:
        y_max = datum['pd']['y'].max()
    y_max = max(y_max, datum['pd']['y'].max())    
    if y_min is None:
        y_min = datum['pd']['y'].min()
    y_min = min(y_min, datum['pd']['y'].min()) 
    
    if z_max is None:
        z_max = datum['pd']['z'].max()
    z_max = max(z_max, datum['pd']['z'].max())    
    if z_min is None:
        z_min = datum['pd']['z'].min()
    z_min = min(z_min, datum['pd']['z'].min()) 
    

maxlength

2047

In [6]:
print('x_max %s' % x_max)
print('x_min %s' % x_min)

print('y_max %s' % y_max)
print('y_min %s' % y_min)

print('z_max %s' % z_max)
print('z_min %s' % z_min)

x_max 362.850308418274
x_min -385.386139154434
y_max 407.209008932114
y_min -184.533506631851
z_max 1414.35980796814
z_min 465.504199266434


In [7]:
len(data)

23748

In [8]:
maxlength = 2050

#### Padding

In [9]:
for ind, datum in enumerate(data):
    paddedtimeseries = tf.keras.preprocessing.sequence.pad_sequences(
        datum['timeseries'],
        padding="pre",
        maxlen=maxlength)
    data[ind]['timeseries'] = paddedtimeseries
    


#### Build training set

In [10]:
xslist = list()
yslist = list()

for datum in data:
    xslist.append(datum['timeseries'])
    yslist.append(datum['meta']['evaluation'])
    
ys = np.array(yslist[:23000])
xs = np.array(xslist[:23000])
ys_test = np.array(yslist[23000:])
xs_test = np.array(xslist[23000:])

In [11]:
print(ys.shape)
print(xs.shape)
print(ys_test.shape)
print(xs_test.shape)

(23000,)
(23000, 3, 2050)
(748,)
(748, 3, 2050)


In [12]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=xs[0].shape))
model.add(tf.keras.layers.LSTM(32))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(6, activation='softmax'))

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'],
              run_eagerly=True)

model.summary()

Metal device set to: Apple M1


2023-01-09 20:38:30.334973: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-01-09 20:38:30.335073: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                266624    
                                                                 
 dense (Dense)               (None, 16)                528       
                                                                 
 dense_1 (Dense)             (None, 6)                 102       
                                                                 
Total params: 267,254
Trainable params: 267,254
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.fit(xs, ys, batch_size=8, epochs=5)

Epoch 1/5


2023-01-09 20:38:30.782253: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2d70ec5b0>

In [14]:
model.evaluate(xs_test, ys_test)



[1.221208930015564, 0.4732620418071747]

In [15]:
y_test_prob = model.predict(xs_test)



In [16]:
y_test_prob

array([[1.8241654e-09, 5.9883559e-01, 6.2840044e-02, 1.3010941e-01,
        5.0889421e-02, 1.5732552e-01],
       [3.1387202e-09, 7.2478378e-01, 2.9002074e-02, 1.2793194e-01,
        4.9988899e-02, 6.8293378e-02],
       [9.0041787e-09, 5.5349922e-01, 6.3931555e-02, 1.9306706e-01,
        7.9253912e-02, 1.1024828e-01],
       ...,
       [1.8238723e-09, 5.9888506e-01, 6.2845238e-02, 1.3008840e-01,
        5.0881192e-02, 1.5730011e-01],
       [8.2540719e-10, 5.0363845e-01, 5.4474767e-02, 1.4468285e-01,
        4.0660497e-02, 2.5654346e-01],
       [3.7273318e-09, 6.2388915e-01, 4.5593344e-02, 1.4303255e-01,
        4.0235981e-02, 1.4724898e-01]], dtype=float32)

__END__