# Notes

Second iteration is to build own input from the raw csv
- every patient is a dictionary
- every exercise in an entry
- every exercise has meta data
- every entry consist of timeserieses for all face points 
- a training/test split shall be based on patients so we could see a progression of exercises


use https://pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/ to mix data

In [1]:
import os 
import re
import csv
import json
from datetime import datetime

import math
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences


# Constants

TODO:
- move to the environment file all global constants

In [2]:
SEQ_LEN = 2050
X_MAX = 362.850308418274
X_MIN = -385.386139154434
Y_MAX = 407.209008932114
Y_MIN = -184.533506631851
Z_MAX = 1414.35980796814
Z_MIN = 465.504199266434

In [3]:
region = {
    '0_LefteyeMidbottom': 'orbital', 
    '1_LefteyeMidtop': 'orbital',  
    '2_LefteyeInnercorner': 'orbital', 
    '3_LefteyeOutercorner': 'orbital',  
    '4_LefteyebrowInner': 'frontal', 
    '5_LefteyebrowCenter': 'frontal',  
    '6_RighteyeMidbottom': 'orbital',  
    '7_RighteyeMidtop': 'orbital', 
    '8_RighteyeInnercorner': 'orbital',  
    '9_RighteyeOutercorner': 'orbital', 
    '10_RighteyebrowInner': 'frontal', 
    '11_RighteyebrowCenter': 'frontal',  
    '12_NoseTip': 'nasal', 
    '13_MouthLowerlipMidbottom': 'oral',
    '14_MouthLeftcorner': 'oral',
    '15_MouthRightcorner': 'oral',
    '16_MouthUpperlipMidtop': 'oral',
    '17_ChinCenter': 'other', 
    '18_ForeheadCenter': 'frontal', 
    '19_LeftcheekCenter': 'other', 
    '20_RightcheekCenter': 'other',
}

In [4]:
dir_data_root = os.path.join('data')
dir_data_source = os.path.join(dir_data_root, 'csv')
dir_data_target = os.path.join(dir_data_root, 'json')

# Data prep

TODO:
- read data from folder
- for every file read it to form sequences for every  poi
- create a list of sequences per poi as exercise
- create a list of metadata for an exercise
- add an exercise to correct patient


In [5]:
def distance(xs, ys, zs):
    distance = [0]
    for i in range(1, len(xs)):
        distance.append(math.dist([xs[i-1], ys[i-1], zs[i-1]], [xs[i], ys[i], zs[i]]))
    
    return distance

In [6]:
def rescale(value, min, max):
    return (value - min) / (max - min)

In [7]:
def unit_vector(vector):
    return vector / np.linalg.norm(vector)

In [8]:
def angle_between_two_points(p1, p2):
    p1_u = unit_vector(p1)
    p2_u = unit_vector(p2)
    return np.arccos(np.clip(np.dot(p1_u, p2_u), -1.0, 1.0))

In [9]:
def direction_angles(xs, ys, zs):
    angles = [0]
    for i in range(1, len(xs)):
        angles.append(angle_between_two_points([xs[i-1], ys[i-1], zs[i-1]], [xs[i], ys[i], zs[i]]))
    
    return angles


In [10]:
def filename_to_metadata(file_name):
    meta = file_name.split(' ')
    patient_id = meta[0]
    try:
        exercise_dates = datetime.strptime(re.sub(r'\_[0-9]', '', meta[1]), '%Y-%m-%d') 
    except:
        print(file_name)
        exercise_dates = ''
        
    evaluation = int(meta[2].replace('eval', ''))
    flag_before_surgery = int(meta[3].replace('bf', '').replace('.csv', ''))
    return {
        'patient_id': patient_id,
        'exercise_dates': exercise_dates.strftime('%Y-%m-%d'),
        'evaluation': evaluation,
        'flag_before_surgery': flag_before_surgery,
    }

In [11]:
def file_to_excercise(directory, filename):   
    file_meta = filename_to_metadata(file_name)
    
    patient_id = file_meta['patient_id']
    
    df = pd.read_csv(os.path.join(directory, filename))
    df.drop(['patient', 'date', 'before surgery', 'evaluation'], axis = 1, inplace = True)
    
    #error handlin in original data, to catch which file has error uncomment this
    #errorrs_in_exercise_ = df[[not isinstance(value, str) for value in df['exercise']]]
    #if len(errorrs_in_exercise_):
    #    print(filename)
    #    print(errorrs_in_exercise_)
    
    exercises = sorted(df['exercise'].astype(str).unique())

    pois = sorted(df['point id'].unique())
    session = {
        'meta': file_meta,
        'exercises': []
    }
    
    for exercise in exercises:
        df_exercise = df[(df['exercise'] == exercise)]
        
        exercise_data = {
            'meta': {
                'tag': exercise,
                'id': int(exercise.split('_')[0]),
                'name': exercise.split('_')[1]
            },
            'pois': [],
        }

        for poi in pois: 
            df_poi = df_exercise[(df_exercise['point id']) == poi]
            df_poi = df_poi.sort_values(by=['t'])
            df_poi = df_poi.drop(columns = ['t', 'exercise', 'point id'], axis=1)
            
            xs = df_poi['x'].tolist()
            ys = df_poi['y'].tolist()
            zs = df_poi['z'].tolist()
            xs_normalized = [rescale(x, X_MIN, X_MAX) for x in xs]
            ys_normalized = [rescale(y, Y_MIN, Y_MAX) for y in ys]
            zs_normalized = [rescale(z, Z_MIN, Z_MAX) for z in zs]
            
            poi_data = {
                'meta': {
                    'tag': poi,
                    'id': int(poi.split('_')[0]),
                    'name': poi.split('_')[1],  
                    'region': region[poi]
                },
                'input': {
                    'x': xs,
                    'y': ys,
                    'z': zs,
                    'dist': distance(xs, ys, zs),
                    'x_normalized': xs_normalized,
                    'y_normalized': ys_normalized,
                    'z_normalized': zs_normalized,
                    'dist_normalized': distance(xs_normalized, ys_normalized, zs_normalized),
                    'direction_angles': direction_angles(xs, ys, zs)
                }
            }
            
            exercise_data['pois'].append(poi_data)
            
        session['exercises'].append(exercise_data)
        
    return patient_id, session

Read every file in the target directory and apply mapper function

In [12]:
data = {}

for file_name in os.listdir(dir_data_source):
    
    patient_id, session = file_to_excercise(dir_data_source, file_name) 
    
    if patient_id not in data:
        data[patient_id] = []
    data[patient_id].append(session)    


for patient_id in data:
    print(patient_id)
    with open(os.path.join(dir_data_target, '%s.json' % patient_id), "w") as f_w:
        json.dump(data[patient_id], f_w, indent = 2)
  


00000000078
00000000000
00000000055
00000000041
00000000015
00000000067
00000000070
00000000016
00000000065
00000000019
00000000003
00000000007
00000000038
00000000014
00000000047
00000000039
00000000002
00000000042
00000000036
00000000023
00000000035
00000000068
00000000010
00000000011
00000000040
00000000029
00000000046
00000000045
00000000028
00000000072
00000000076
00000000026
00000000052
00000000018
00000000057
00000000001
00000000017
00000000025
00000000059
00000000080
00000000081
00000000048
00000000064
00000000033
00000000006
00000000082
00000000083
00000000063
00000000009
00000000069
00000000060
00000000012
00000000073
00000000053
00000000066
00000000027
00000000020
00000000074
00000000051
00000000030
00000000031
00000000085
00000000061
00000000071
00000000056
00000000004
00000000005
00000000050
00000000079
00000000049
00000000034
00000000044
00000000008
00000000043
00000000013
00000000077
00000000022
00000000037
00000000058
00000000075
00000000032


# Classification

## Data set per session

### Build training set

In [13]:
xslist = list()
yslist = list()

for patient_id in data:
    print('Extracting sessions for patient: %s' % patient_id)
    
    for session in data[patient_id]:
        for excercise in session['exercises']: 
            xs_excercise = []
            for poi in excercise['pois']:
                #xs_excercise.append(poi['input']['x'])
                #xs_excercise.append(poi['input']['y'])
                #xs_excercise.append(poi['input']['z'])
                #xs_excercise.append(poi['input']['dist'])
                xs_excercise.append(poi['input']['x_normalized'])
                xs_excercise.append(poi['input']['y_normalized'])
                xs_excercise.append(poi['input']['z_normalized'])
                xs_excercise.append(poi['input']['dist_normalized'])
                xs_excercise.append(poi['input']['direction_angles'])
    
            xslist.append(pad_sequences(
                xs_excercise,
                padding="pre",
                maxlen=SEQ_LEN))
            yslist.append(session['meta']['evaluation'])


Extracting sessions for patient: 00000000078
Extracting sessions for patient: 00000000000
Extracting sessions for patient: 00000000055
Extracting sessions for patient: 00000000041
Extracting sessions for patient: 00000000015
Extracting sessions for patient: 00000000067
Extracting sessions for patient: 00000000070
Extracting sessions for patient: 00000000016
Extracting sessions for patient: 00000000065
Extracting sessions for patient: 00000000019
Extracting sessions for patient: 00000000003
Extracting sessions for patient: 00000000007
Extracting sessions for patient: 00000000038
Extracting sessions for patient: 00000000014
Extracting sessions for patient: 00000000047
Extracting sessions for patient: 00000000039
Extracting sessions for patient: 00000000002
Extracting sessions for patient: 00000000042
Extracting sessions for patient: 00000000036
Extracting sessions for patient: 00000000023
Extracting sessions for patient: 00000000035
Extracting sessions for patient: 00000000068
Extracting

In [14]:
len(xslist)
len(yslist)

1029

In [15]:
ys = np.array(yslist[:900])
xs = np.array(xslist[:900])
ys_test = np.array(yslist[900:])
xs_test = np.array(xslist[900:])

In [16]:
print(ys.shape)
print(xs.shape)
print(ys_test.shape)
print(xs_test.shape)

(900,)
(900, 105, 2050)
(129,)
(129, 105, 2050)


In [20]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from tensorflow.keras import backend as K

In [21]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

## Modeling

In [22]:
inputShape = xs[0].shape

print('Input shape: %s' % (str(inputShape)))

xs_input = Input(shape=inputShape)
x = LSTM(32, activation="relu")(xs_input)
x = Dense(16, activation="relu")(x)
x = Dense(6, activation="softmax")(x)

model = Model(inputs=xs_input, outputs=x)

model.compile(
    loss="sparse_categorical_crossentropy", 
    optimizer=Adam(lr=1e-3, decay=1e-3 / 200),
    metrics=['accuracy', f1_m, precision_m, recall_m]
)

model.summary()

Input shape: (105, 2050)
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 105, 2050)]       0         
                                                                 
 lstm_1 (LSTM)               (None, 32)                266624    
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 6)                 102       
                                                                 
Total params: 267,254
Trainable params: 267,254
Non-trainable params: 0
_________________________________________________________________


In [23]:
model.fit(
    x=xs, y=ys, 
    validation_data=(xs_test, ys_test),
    batch_size=8, epochs=5)

Epoch 1/5


2023-01-09 20:56:04.590151: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-01-09 21:10:27.118507: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x173304220>

__END__