In [1]:
import os
import glob
import pathlib
from pathlib import Path

import numpy as np
import pandas as pd
import math

import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl')

from itertools import chain

from PIL import Image

import deepdish as dd
import csv

import random

In [2]:
import importlib
import pickle


class PickleProtocol:
    def __init__(self, level):
        self.previous = pickle.HIGHEST_PROTOCOL
        self.level = level

    def __enter__(self):
        importlib.reload(pickle)
        pickle.HIGHEST_PROTOCOL = self.level

    def __exit__(self, *exc):
        importlib.reload(pickle)
        pickle.HIGHEST_PROTOCOL = self.previous


def pickle_protocol(level):
    return PickleProtocol(level)

In [3]:
rootdir = os.getcwd()
searchstring = '\\Es1\\Label'

clinical_assessment_files = []
supplementary_info_files = []

for rootdir, dirs, files in os.walk(rootdir):
    for subdir in dirs:
        subdir_name = os.path.join(rootdir, subdir)
        if searchstring in subdir_name:
            onlyfiles = [f for f in os.listdir(subdir_name) if os.path.isfile(os.path.join(subdir_name, f))]
            clinical_assessment_files.append(os.path.join(subdir_name, onlyfiles[0]))
            supplementary_info_files.append(os.path.join(subdir_name, onlyfiles[1]))


suppinfo = []
clinical = []

for file in supplementary_info_files:
    data = pd.read_excel(file)
    suppinfo.append(data)
    
for file in clinical_assessment_files:
    data = pd.read_excel(file)
    clinical.append(data)
    
suppinfo_df = pd.concat(suppinfo, ignore_index=True)
clinical_df = pd.concat(clinical, ignore_index=True)

kimore_df = suppinfo_df.merge(clinical_df, on='Subject ID')

display(kimore_df)

Unnamed: 0,Subject ID,Group,Age,Gender,clinical TS Ex#1,clinical TS Ex#2,clinical TS Ex#3,clinical TS Ex#4,clinical TS Ex#5,clinical PO Ex#1,clinical PO Ex#2,clinical PO Ex#3,clinical PO Ex#4,clinical PO Ex#5,clinical CF Ex#1,clinical CF Ex#2,clinical CF Ex#3,clinical CF Ex#4,clinical CF Ex#5
0,B_ID1,B,66,F,41.000000,16.000000,26.000000,23.000000,22.000000,15.000000,5.000000,9.000000,4.000000,5.000000,26.000000,11.000000,17.000000,19.000000,17.000000
1,B_ID2,B,52,M,38.000000,45.000000,35.000000,26.000000,36.000000,12.000000,15.000000,8.000000,3.000000,13.000000,26.000000,30.000000,27.000000,23.000000,23.000000
2,B_ID3,B,78,F,28.210081,31.605061,25.178138,29.462784,16.245854,9.394392,10.063701,0.425665,10.187079,0.005147,18.815689,21.541360,24.752473,19.275704,16.240707
3,B_ID4,B,52,F,33.339733,31.299278,38.358822,36.262890,32.118419,7.783826,10.755795,10.998189,11.997940,1.913925,25.555907,20.543484,27.360633,24.264950,30.204493
4,B_ID5,B,72,M,44.333333,35.000000,36.666667,40.000000,43.333333,13.000000,10.666667,12.333333,10.333333,12.333333,31.333333,24.333333,24.333333,29.666667,31.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,NE_ID5,NE,31,M,40.666667,28.666667,38.000000,31.333333,21.333333,13.000000,9.000000,9.666667,9.000000,9.666667,27.666667,19.666667,28.333333,22.333333,11.666667
74,NE_ID6,NE,28,M,43.666667,42.666667,42.333333,29.333333,33.000000,14.000000,12.666667,13.333333,7.000000,11.333333,29.666667,30.000000,29.000000,22.333333,21.666667
75,NE_ID7,NE,26,M,42.000000,30.000000,41.000000,33.000000,39.000000,15.000000,9.000000,15.000000,3.000000,12.000000,27.000000,21.000000,26.000000,30.000000,27.000000
76,NE_ID8,NE,58,F,40.000000,26.000000,33.000000,26.000000,42.000000,15.000000,5.000000,12.000000,3.000000,15.000000,25.000000,21.000000,21.000000,23.000000,27.000000


In [4]:
def rating(total_score):
    if (total_score > 44):
        return 3
    elif (total_score <= 44 and total_score > 34): 
        return 2
    else:
        return 1

In [5]:
def uneven_rating(total_score):
    if (total_score > 37):
        return 3
    elif (total_score <= 37 and total_score > 25): 
        return 2
    else:
        return 1

In [6]:
def info_from_filename(filename):
    file_info = filename.split('_')
    group = file_info[0]
    subject_id = file_info[1]
    exercise = file_info[2].split('.')[0]
    return group, subject_id, int(exercise[2])

In [7]:
def info_from_filename_seq(filename):
    file_info = filename.split('_')
    group = file_info[0]
    subject_id = file_info[1]
    exercise = file_info[2]
    sequence = file_info[3].split('.')[0][-1:]
    return group, subject_id, int(exercise[2]), int(sequence)

In [8]:
resnet_images_dir = os.path.join(os.getcwd(), 'kimore224norm_blazepose_xyz_noOverlap')
file_list = []
dict_list = []

for item in os.scandir(resnet_images_dir):
    if item.is_file():
        # filename
        filename = item.name
        file_list.append(filename)
        
        # group, ID, exercise
        group, subject_id, exercise, sequence = info_from_filename_seq(filename)
        
        # image as ndarrray
        filepath = os.path.join(resnet_images_dir, filename)
        image = np.asarray(Image.open(filepath))
        
        # TS, PO, CF
        subject_entry = kimore_df[(kimore_df['Subject ID'].str.contains(subject_id)) & (kimore_df['Group'].str.contains(group))]
        exercise_entry = [col for col in subject_entry.columns if str(exercise) in col]
        total_score = round(subject_entry[exercise_entry[0]].values[0], 2)
        primary_outcome = round(subject_entry[exercise_entry[1]].values[0], 2)
        control_factor = round(subject_entry[exercise_entry[2]].values[0], 2)
        
        # rating
        #rating_val = rating(total_score)
        rating_val = uneven_rating(total_score)
        
        # onehot encodings
        rating_onehot = np.zeros(3)
        rating_onehot[rating_val - 1] = 1        
        exercise_onehot = np.zeros(5)
        exercise_onehot[exercise - 1] = 1
        
        # make dict
        filename = {
            "image": image,
            "group": group,
            "subject_id": subject_id,
            "slice": sequence,
            "TS": total_score,
            "PO": primary_outcome,
            "CF": control_factor,
            "rating": rating_val,
            "rating_onehot": rating_onehot,
            "exercise": exercise,
            "exercise_onehot": exercise_onehot
        }
        dict_list.append(filename)

In [9]:
# resnet_images_dir = os.path.join(os.getcwd(), 'resnet_images')
# file_list = []
# dict_list = []

# for item in os.scandir(resnet_images_dir):
#     if item.is_file():
#         # filename
#         filename = item.name
#         file_list.append(filename)
        
#         # group, ID, exercise
#         group, subject_id, exercise = info_from_filename(filename)
        
#         # image as ndarrray
#         filepath = os.path.join(resnet_images_dir, filename)
#         image = np.asarray(Image.open(filepath))
        
#         # TS, PO, CF
#         subject_entry = kimore_df[(kimore_df['Subject ID'].str.contains(subject_id)) & (kimore_df['Group'].str.contains(group))]
#         exercise_entry = [col for col in subject_entry.columns if str(exercise) in col]
#         total_score = round(subject_entry[exercise_entry[0]].values[0], 2)
#         primary_outcome = round(subject_entry[exercise_entry[1]].values[0], 2)
#         control_factor = round(subject_entry[exercise_entry[2]].values[0], 2)
        
#         # rating
#         rating_val = rating(total_score)
        
#         # onehot encodings
#         rating_onehot = np.zeros(3)
#         rating_onehot[rating_val - 1] = 1        
#         exercise_onehot = np.zeros(5)
#         exercise_onehot[exercise - 1] = 1
        
#         # make dict
#         filename = {
#             "image": image,
#             "group": group,
#             "subject_id": subject_id,
#             "TS": total_score,
#             "PO": primary_outcome,
#             "CF": control_factor,
#             "rating": rating_val,
#             "rating_onehot": rating_onehot,
#             "exercise": exercise,
#             "exercise_onehot": exercise_onehot
#         }
#         dict_list.append(filename)

In [9]:
with open('kimore224norm_blazepose_xyz_noOverlap_filenames.txt', 'w') as f:
    for file in file_list:
        f.write(f"{file} ")

In [10]:
# with pickle_protocol(4):
    #pd.DataFrame(['hello', 'world']).to_hdf('foo_1.h5', 'x')
dd.io.save('kimore224norm_blazepose_xyz_noOverlap_dicts.h5', dict_list)

print("done!")

done!


In [11]:
# load pretrained resnet_classifier model

# remove last layer (Dense=20 MoVi classification layer)
    # https://stackoverflow.com/questions/41668813/how-to-add-and-remove-new-layers-in-keras-after-loading-weights
    # https://stackoverflow.com/questions/41378461/how-to-use-models-from-keras-applications-for-transfer-learnig/41386444#41386444

# make multi-output layer with dimensions (Dense=5) and (Dense=3) for exercise and rating respectively

In [11]:
# load kimore_filenames.txt and kimore_dicts.h5
    # both have length = 385
    # deepdish info: https://deepdish.readthedocs.io/en/latest/io.html#lists-and-tuples

with open("kimore224norm_blazepose_xyz_noOverlap_filenames.txt", "r") as txt_file:
    file_contents = txt_file.readlines()

kimore_filenames = file_contents[0].split(' ')[:-1]
kimore_dicts = dd.io.load('kimore224norm_blazepose_xyz_noOverlap_dicts.h5')
print(kimore_dicts[0])
print(kimore_filenames[0])
    
# # randomly select 80% training, 20% validation (or maybe reserve some for test??)
# split = 0.8

# train_split = round(split * len(kimore_filenames))
# train_files = random.sample(kimore_filenames, train_split)
# val_files = [f for f in kimore_filenames if f not in train_files]

# # assert that the train and val files are unique
# #print([file for file in train_files if file in val_files])

# # get indexes
# train_idx = [i for i in range(len(kimore_filenames)) if kimore_filenames[i] in train_files]
# val_idx = [i for i in range(len(kimore_filenames)) if kimore_filenames[i] in val_files]

# # NN input = kimore_dicts[idx]

{'image': array([[[116, 208,  34],
        [116, 207,  35],
        [117, 207,  35],
        ...,
        [ 86,   2,  90],
        [ 85,   2,  90],
        [ 84,   2,  90]],

       [[116, 208,  34],
        [116, 207,  35],
        [117, 207,  35],
        ...,
        [ 86,   2,  90],
        [ 85,   2,  90],
        [ 84,   2,  90]],

       [[116, 208,  34],
        [116, 207,  35],
        [117, 207,  35],
        ...,
        [ 86,   2,  90],
        [ 85,   2,  90],
        [ 84,   2,  90]],

       ...,

       [[121, 208,  39],
        [121, 208,  39],
        [122, 208,  40],
        ...,
        [ 87,  11,  89],
        [ 86,  11,  89],
        [ 85,  11,  89]],

       [[121, 208,  40],
        [121, 208,  40],
        [122, 208,  41],
        ...,
        [ 87,  11,  88],
        [ 86,  11,  88],
        [ 85,  11,  88]],

       [[121, 208,  40],
        [121, 208,  40],
        [122, 208,  41],
        ...,
        [ 87,  11,  88],
        [ 86,  11,  88],
        [ 85, 

In [214]:
subject_list = []
for dic in kimore_dicts:
    group = dic['group']
    sub_id = dic['subject_id']
    subject = [group, sub_id]
    subject_list.append(subject)
    
unique_subjects = [list(x) for x in set(tuple(x) for x in subject_list)]

In [224]:
test_split = 0.15
split = 1
while (split > test_split):
    test_subject_num = round(0.1 * len(unique_subjects))
    test_subject_list = np.array(random.sample(unique_subjects, test_subject_num))
    test_dicts = []
    test_idx = []
    for d in range(len(kimore_dicts)):
        group = kimore_dicts[d]['group']
        sub_id = kimore_dicts[d]['subject_id']
        if (group in test_subject_list[:, 0]):
            if (sub_id in test_subject_list[:, 1]):
                test_dicts.append(kimore_dicts[d])
                test_idx.append(d)          
                
    split = len(test_dicts) / len(kimore_dicts)

print(len(test_dicts))
print(len(kimore_dicts))
print(len(test_dicts) / len(kimore_dicts))

815
5809
0.1402995352039938


In [216]:
non_test_idx = [i for i in list(range(5809)) if i not in test_idx]

In [217]:
test_files = [kimore_filenames[i] for i in test_idx]
non_test_files = [kimore_filenames[i] for i in non_test_idx]

In [218]:
train_file_num = round(0.8 * len(non_test_files))
train_files = random.sample(non_test_files, train_file_num)
train_idx = [i for i in range(len(non_test_files)) if non_test_files[i] in train_files]
print(len(train_files))

val_files = [f for f in non_test_files if f not in train_files]
val_idx = [i for i in range(len(non_test_files)) if non_test_files[i] in val_files]
print(len(val_files))

total = len(test_files) + len(train_files) + len(val_files)
print(total)

3998
999
5809


In [219]:
# get numpy arrays of train inputs and outputs
train_input_list = []
train_ratings_list = []
train_exercises_list = []

for idx in train_idx:
    train_dict = kimore_dicts[idx]
    
    train_input_list.append(train_dict["image"])
    train_ratings_list.append(train_dict["rating_onehot"])
    train_exercises_list.append(train_dict["exercise_onehot"])


train_inputs = np.stack(train_input_list)
train_ratings = np.stack(train_ratings_list)
train_exercises = np.stack(train_exercises_list)

print(train_inputs.shape)
print(train_ratings.shape)
print(train_exercises.shape)

(3998, 224, 224, 3)
(3998, 3)
(3998, 5)


In [220]:
# get numpy arrays of val inputs and outputs
val_input_list = []
val_ratings_list = []
val_exercises_list = []

for idx in val_idx:
    val_dict = kimore_dicts[idx]
    
    val_input_list.append(val_dict["image"])
    val_ratings_list.append(val_dict["rating_onehot"])
    val_exercises_list.append(val_dict["exercise_onehot"])


val_inputs = np.stack(val_input_list)
val_ratings = np.stack(val_ratings_list)
val_exercises = np.stack(val_exercises_list)

print(val_inputs.shape)
print(val_ratings.shape)
print(val_exercises.shape)

(999, 224, 224, 3)
(999, 3)
(999, 5)


In [221]:
test_input_list = []
test_ratings_list = []
test_exercises_list = []

for idx in test_idx:
    test_dict = kimore_dicts[idx]
    test_input_list.append(test_dict["image"])
    test_ratings_list.append(test_dict["rating_onehot"])
    test_exercises_list.append(test_dict["exercise_onehot"])

test_inputs = np.stack(test_input_list)
test_ratings = np.stack(test_ratings_list)
test_exercises = np.stack(test_exercises_list)

print(test_inputs.shape)
print(test_ratings.shape)
print(test_exercises.shape)

(812, 224, 224, 3)
(812, 3)
(812, 5)


In [None]:
# run deep learning within pipeline of pose detector
# doesn't matter which action classifier for now
# just needs to take skeleton and feed it through
# 2 parallel deep learning models on camera - pose and classifier
# resnet - good baseline that we know will work
# in the future, can use MobileNet for speed and see the accuracy tradeoff


# SLURM documentation