# Notes

The data processing iteration focus to extract input from the raw csv
- every output file is patien_session_excercise.json
- every patien has list of all sessions with metadata

## Object shape

Patient object

poi object

exercise object

# Init

In [1]:
import os 
import re
import csv
import json
from datetime import datetime

import math
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
dir_data_root = os.path.join('..', 'data')
dir_data_source = os.path.join(dir_data_root, 'csv')
dir_data_target = os.path.join(dir_data_root, 'json')
dir_exercises = os.path.join(dir_data_target, 'exercises_raw')
dir_patients_sessions = os.path.join(dir_data_target, 'patients_sessions')

# Data prep

## Prepare POIs metadata and store as an object

In [3]:
regions = {
    '0_LefteyeMidbottom': 'orbital', 
    '1_LefteyeMidtop': 'orbital',  
    '2_LefteyeInnercorner': 'orbital', 
    '3_LefteyeOutercorner': 'orbital',  
    '4_LefteyebrowInner': 'frontal', 
    '5_LefteyebrowCenter': 'frontal',  
    '6_RighteyeMidbottom': 'orbital',  
    '7_RighteyeMidtop': 'orbital', 
    '8_RighteyeInnercorner': 'orbital',  
    '9_RighteyeOutercorner': 'orbital', 
    '10_RighteyebrowInner': 'frontal', 
    '11_RighteyebrowCenter': 'frontal',  
    '12_NoseTip': 'frontal', 
    '13_MouthLowerlipMidbottom': 'oral',
    '14_MouthLeftcorner': 'oral',
    '15_MouthRightcorner': 'oral',
    '16_MouthUpperlipMidtop': 'oral',
    '17_ChinCenter': 'oral', 
    '18_ForeheadCenter': 'frontal', 
    '19_LeftcheekCenter': 'oral', 
    '20_RightcheekCenter': 'oral',
}
bases = {
    '0_LefteyeMidbottom': '18_ForeheadCenter', 
    '1_LefteyeMidtop': '18_ForeheadCenter',  
    '2_LefteyeInnercorner': '18_ForeheadCenter', 
    '3_LefteyeOutercorner': '18_ForeheadCenter',  
    '4_LefteyebrowInner': '18_ForeheadCenter', 
    '5_LefteyebrowCenter': '18_ForeheadCenter',  
    '6_RighteyeMidbottom': '18_ForeheadCenter',  
    '7_RighteyeMidtop': '18_ForeheadCenter', 
    '8_RighteyeInnercorner': '18_ForeheadCenter',  
    '9_RighteyeOutercorner': '18_ForeheadCenter', 
    '10_RighteyebrowInner': '18_ForeheadCenter', 
    '11_RighteyebrowCenter': '18_ForeheadCenter',  
    '12_NoseTip': '18_ForeheadCenter', 
    '13_MouthLowerlipMidbottom': '18_ForeheadCenter',
    '14_MouthLeftcorner': '18_ForeheadCenter',
    '15_MouthRightcorner': '18_ForeheadCenter',
    '16_MouthUpperlipMidtop': '18_ForeheadCenter',
    '17_ChinCenter': '18_ForeheadCenter', 
    '18_ForeheadCenter': 'base', 
    '19_LeftcheekCenter': '18_ForeheadCenter', 
    '20_RightcheekCenter': '18_ForeheadCenter',
}

In [4]:
poi_object = {}

for tag,region in regions.items():    
    poi_object[tag] = {
        'tag': tag,
        'id': int(tag.split('_')[0]),
        'name': tag.split('_')[1],
        'region': region,
        'base_tag': bases[tag]
    }

In [5]:
#poi_object  

# Read data from source 

## Set up patien object

In [6]:
def filename_to_session_metadata(file_name):
    meta = file_name.split(' ')
    patient_id = meta[0]
    try:
        exercise_dates = datetime.strptime(re.sub(r'\_[0-9]', '', meta[1]), '%Y-%m-%d') 
    except:
        print(file_name)
        exercise_dates = ''
        
    evaluation = int(meta[2].replace('eval', ''))
    flag_before_surgery = int(meta[3].replace('bf', '').replace('.csv', ''))
    return {
        'patient_id': patient_id,
        'exercise_dates': exercise_dates.strftime('%Y-%m-%d'),
        'evaluation': evaluation,
        'flag_before_surgery': flag_before_surgery,
    }

In [7]:
def file_to_exercises(directory, filename):   
    session_meta = filename_to_session_metadata(file_name)
    
    patient_id = session_meta['patient_id']
    
    ## TODO move it outside fishy
    if patient_id not in patients:
        patients[patient_id] = []
    
    exercises = []
    
    df = pd.read_csv(os.path.join(directory, filename))
    df.drop(['patient', 'date', 'before surgery', 'evaluation'], axis = 1, inplace = True)
    
    #error handlin in original data, to catch which file has error uncomment this
    #errorrs_in_exercise_ = df[[not isinstance(value, str) for value in df['exercise']]]
    #if len(errorrs_in_exercise_):
    #    print(filename)
    #    print(errorrs_in_exercise_)
    
    tags_in_session = sorted(df['exercise'].astype(str).unique())
    
    for tag in tags_in_session:
        df_exercise = df[(df['exercise'] == tag)]
        exercise = {}
        exercise['meta'] = {
                'tag': tag,
                'id': int(tag.split('_')[0]),
                'name': tag.split('_')[1],
                'evaluation': session_meta['evaluation'],
        }
        exercise['pois'] = {}
        
        for poi in poi_object.keys(): 
            df_poi = df_exercise[(df_exercise['point id']) == poi]
            df_poi = df_poi.sort_values(by=['t'])
            df_poi = df_poi.drop(columns = ['exercise', 'point id'], axis=1)
            
            exercise['pois'][poi] = {
                'ts': df_poi['t'].tolist(),
                'xs': df_poi['x'].tolist(),
                'ys': df_poi['y'].tolist(),
                'zs': df_poi['z'].tolist()
            }
        exercises.append(exercise)

    session = {
        'id': len(patients[patient_id]), ## TODO move it outside fishy
        'exercise_dates': session_meta['exercise_dates'],
        'evaluation': session_meta['evaluation'],
        'flag_before_surgery': session_meta['flag_before_surgery'],
        'number_of_exercises': len(exercises)
    }
    
    ## TODO move it outside fishy
    patients[patient_id].append(session)
    
    return patient_id, session, exercises

Read every file in the target directory and apply mapper function

In [8]:
patients = {}

for file_name in os.listdir(dir_data_source):
    
    patient_id, session, exercises = file_to_exercises(dir_data_source, file_name) 
    
    for excercise in exercises:
        with open(
            os.path.join(dir_exercises, '%s_%s_%s.json' % (patient_id, session['id'], excercise['meta']['id'])),
            'w') as f_w:
            json.dump(excercise, f_w)

for patient_id, sessions in patients.items():
    with open(os.path.join(dir_patients_sessions, '%s.json' % patient_id), 'w') as f_w:
        json.dump(sessions, f_w)


__END__