# Notes

The data processing iteration focus to extract input from the raw csv
- every patient is a json
- every session in an entry in patien object
- every exercise is an entry in a session entry
- every poi is an entry in an exercise object


## Object shape

In [1]:
import os 
import re
import csv
import json
from datetime import datetime

import math
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
dir_data_root = os.path.join('data')
dir_data_source = os.path.join(dir_data_root, 'csv')
dir_data_target = os.path.join(dir_data_root, 'json')

# Data prep

In [3]:
region = {
    '0_LefteyeMidbottom': 'orbital', 
    '1_LefteyeMidtop': 'orbital',  
    '2_LefteyeInnercorner': 'orbital', 
    '3_LefteyeOutercorner': 'orbital',  
    '4_LefteyebrowInner': 'frontal', 
    '5_LefteyebrowCenter': 'frontal',  
    '6_RighteyeMidbottom': 'orbital',  
    '7_RighteyeMidtop': 'orbital', 
    '8_RighteyeInnercorner': 'orbital',  
    '9_RighteyeOutercorner': 'orbital', 
    '10_RighteyebrowInner': 'frontal', 
    '11_RighteyebrowCenter': 'frontal',  
    '12_NoseTip': 'frontal', 
    '13_MouthLowerlipMidbottom': 'oral',
    '14_MouthLeftcorner': 'oral',
    '15_MouthRightcorner': 'oral',
    '16_MouthUpperlipMidtop': 'oral',
    '17_ChinCenter': 'oral', 
    '18_ForeheadCenter': 'frontal', 
    '19_LeftcheekCenter': 'oral', 
    '20_RightcheekCenter': 'oral',
}
base = {
    '0_LefteyeMidbottom': '18_ForeheadCenter', 
    '1_LefteyeMidtop': '18_ForeheadCenter',  
    '2_LefteyeInnercorner': '18_ForeheadCenter', 
    '3_LefteyeOutercorner': '18_ForeheadCenter',  
    '4_LefteyebrowInner': '18_ForeheadCenter', 
    '5_LefteyebrowCenter': '18_ForeheadCenter',  
    '6_RighteyeMidbottom': '18_ForeheadCenter',  
    '7_RighteyeMidtop': '18_ForeheadCenter', 
    '8_RighteyeInnercorner': '18_ForeheadCenter',  
    '9_RighteyeOutercorner': '18_ForeheadCenter', 
    '10_RighteyebrowInner': '18_ForeheadCenter', 
    '11_RighteyebrowCenter': '18_ForeheadCenter',  
    '12_NoseTip': '18_ForeheadCenter', 
    '13_MouthLowerlipMidbottom': '18_ForeheadCenter',
    '14_MouthLeftcorner': '18_ForeheadCenter',
    '15_MouthRightcorner': '18_ForeheadCenter',
    '16_MouthUpperlipMidtop': '18_ForeheadCenter',
    '17_ChinCenter': '18_ForeheadCenter', 
    '18_ForeheadCenter': 'base', 
    '19_LeftcheekCenter': '18_ForeheadCenter', 
    '20_RightcheekCenter': '18_ForeheadCenter',
}

In [4]:
def filename_to_metadata(file_name):
    meta = file_name.split(' ')
    patient_id = meta[0]
    try:
        exercise_dates = datetime.strptime(re.sub(r'\_[0-9]', '', meta[1]), '%Y-%m-%d') 
    except:
        print(file_name)
        exercise_dates = ''
        
    evaluation = int(meta[2].replace('eval', ''))
    flag_before_surgery = int(meta[3].replace('bf', '').replace('.csv', ''))
    return {
        'patient_id': patient_id,
        'exercise_dates': exercise_dates.strftime('%Y-%m-%d'),
        'evaluation': evaluation,
        'flag_before_surgery': flag_before_surgery,
    }

In [5]:
def file_to_excercise(directory, filename):   
    file_meta = filename_to_metadata(file_name)
    
    patient_id = file_meta['patient_id']
    
    df = pd.read_csv(os.path.join(directory, filename))
    df.drop(['patient', 'date', 'before surgery', 'evaluation'], axis = 1, inplace = True)
    
    #error handlin in original data, to catch which file has error uncomment this
    #errorrs_in_exercise_ = df[[not isinstance(value, str) for value in df['exercise']]]
    #if len(errorrs_in_exercise_):
    #    print(filename)
    #    print(errorrs_in_exercise_)
    
    exercises = sorted(df['exercise'].astype(str).unique())

    pois = sorted(df['point id'].unique())
    session = {
        'meta': file_meta,
        'exercises': []
    }
    
    for exercise in exercises:
        df_exercise = df[(df['exercise'] == exercise)]
        
        exercise_data = {
            'meta': {
                'tag': exercise,
                'id': int(exercise.split('_')[0]),
                'name': exercise.split('_')[1]
            },
            'pois': [],
        }

        for poi in pois: 
            df_poi = df_exercise[(df_exercise['point id']) == poi]
            df_poi = df_poi.sort_values(by=['t'])
            df_poi = df_poi.drop(columns = ['exercise', 'point id'], axis=1)
            
            ts = df_poi['t'].tolist()
            xs = df_poi['x'].tolist()
            ys = df_poi['y'].tolist()
            zs = df_poi['z'].tolist()
            
            poi_data = {
                'meta': {
                    'tag': poi,
                    'id': int(poi.split('_')[0]),
                    'name': poi.split('_')[1],  
                    'region': region[poi],
                    'base': base[poi]
                },
                'input': {
                    'ts': ts,
                    'xs': xs,
                    'ys': ys,
                    'zs': zs,
                }
            }
            
            exercise_data['pois'].append(poi_data)
            
        session['exercises'].append(exercise_data)
        
    return patient_id, session

Read every file in the target directory and apply mapper function

In [6]:
data = {}

for file_name in os.listdir(dir_data_source):
    
    patient_id, session = file_to_excercise(dir_data_source, file_name) 
    
    if patient_id not in data:
        data[patient_id] = []
    data[patient_id].append(session)    


for patient_id in data:
    print(patient_id)
    with open(os.path.join(dir_data_target, '%s.json' % patient_id), "w") as f_w:
        json.dump(data[patient_id], f_w)
  


00000000078
00000000000
00000000055
00000000041
00000000015
00000000067
00000000070
00000000016
00000000065
00000000019
00000000003
00000000007
00000000038
00000000014
00000000047
00000000039
00000000002
00000000042
00000000036
00000000023
00000000035
00000000068
00000000010
00000000011
00000000040
00000000029
00000000046
00000000045
00000000028
00000000072
00000000076
00000000026
00000000052
00000000018
00000000057
00000000001
00000000017
00000000025
00000000059
00000000080
00000000081
00000000048
00000000064
00000000033
00000000006
00000000082
00000000083
00000000063
00000000009
00000000069
00000000060
00000000012
00000000073
00000000053
00000000066
00000000027
00000000020
00000000074
00000000051
00000000030
00000000031
00000000085
00000000061
00000000071
00000000056
00000000004
00000000005
00000000050
00000000079
00000000049
00000000034
00000000044
00000000008
00000000043
00000000013
00000000077
00000000022
00000000037
00000000058
00000000075
00000000032


__END__