1. Baza danych(#baza)

In [1]:
import btk
from ezc3d import c3d
import pandas as pd
import numpy as np
import glob

In [2]:
def cropp_c3dfile(eventsFrame, filename, destiny):
    """
    Funkcja oddzielajaca pojedyncze ruchy w odrebne pliki na podstawie danych o markerach.
    
    Input:
    -eventsFrame - poczatek i koniec wycinka w formacie [[a,b],[a,b],...]
    -filename - sciezka pliku do podzielenia
    -destiny - sciezka, do ktorej zostana zapisane wyodrebnione czesci
    
    Output:
    - Podzielone pliki c3d zawierajace dane o pojedynczym ruchu
    
    """
    reader = btk.btkAcquisitionFileReader()
    reader.SetFilename(filename)
    reader.Update()
    acq = reader.GetOutput()
 
    writer = btk.btkAcquisitionFileWriter()
    
    for i in range(0, len(eventsFrame)):
        clone = acq.Clone();
        clone.ResizeFrameNumberFromEnd(acq.GetLastFrame() - eventsFrame[i][0] + 1)
        clone.ResizeFrameNumber(eventsFrame[i][1] - eventsFrame[i][0] + 1)
        clone.SetFirstFrame(eventsFrame[i][0])
        clone.ClearEvents()
        for e in btk.Iterate(acq.GetEvents()):
            if ((e.GetFrame() > clone.GetFirstFrame()) and (e.GetFrame() < clone.GetLastFrame())):
                clone.AppendEvent(e)
        clone.SetFirstFrame(1)
        writer.SetInput(clone)
        writer.SetFilename(destiny + '\\' + (filename.split('\\')[-1]).split('.')[0] + '.c3d')
        writer.Update()

In [3]:
path = f'medical_dataset_AK'

In [4]:
filelist =[]
for file in glob.glob(f'{path}\\**\\*.c3d',recursive = True):
    filelist.append(file)

In [5]:
def read_labels(sample):
    e_label = pd.DataFrame(sample['parameters']['EVENT']['LABELS']['value'])
    e_contexts = pd.DataFrame(sample['parameters']['EVENT']['CONTEXTS']['value'])

    times = sample['parameters']['EVENT']['TIMES']['value']
    e_frames = pd.DataFrame([value * 100 for value in times if value != 0]).astype(int)

    event = pd.concat([e_label, e_contexts, e_frames], axis=1)
    event.columns = ['label', 'context', 'frames']
    event = event.set_index('frames')
    event = event.sort_index(axis=0)

    return event

In [6]:
def get_one_step_time(sample, label, context):
    df = read_labels(sample)
    temp = [frame for frame in df.index if df['label'][frame] == label and df['context'][frame] == context]

    return temp[0], temp[1]

In [7]:
for file in filelist:
    sample = c3d(file)
    start, stop = get_one_step_time(sample, 'Foot Strike', 'Right')
    eventsFrame = [[start, stop]]
    cropp_c3dfile(eventsFrame, file, 'cut')

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [8]:
marker_list = ['LANK', 'RANK', 'LKNE', 'RKNE', 'LFIN', 'RFIN', 'LSHO', 'RSHO', 'LASI', 'RFHD','RWRA','RHEE','LHEE','CentreOfMass']

In [9]:
path = f'cut'
filelist =[]
for file in glob.glob(f'{path}\\*.c3d',recursive = True):
    filelist.append(file)
filelist

['cut\\degeneration_01.c3d',
 'cut\\degeneration_02.c3d',
 'cut\\degeneration_03.c3d',
 'cut\\degeneration_04.c3d',
 'cut\\degeneration_05.c3d',
 'cut\\degeneration_06.c3d',
 'cut\\degeneration_07.c3d',
 'cut\\degeneration_08.c3d',
 'cut\\degeneration_09.c3d',
 'cut\\degeneration_10.c3d',
 'cut\\degeneration_11.c3d',
 'cut\\degeneration_12.c3d',
 'cut\\degeneration_13.c3d',
 'cut\\degeneration_14.c3d',
 'cut\\degeneration_15.c3d',
 'cut\\healthy_02.c3d',
 'cut\\healthy_03.c3d',
 'cut\\healthy_04.c3d',
 'cut\\healthy_05.c3d',
 'cut\\healthy_06.c3d',
 'cut\\healthy_07.c3d',
 'cut\\healthy_09.c3d',
 'cut\\healthy_11.c3d',
 'cut\\healthy_13.c3d',
 'cut\\healthy_14.c3d',
 'cut\\healthy_15.c3d']

In [10]:
avg = 127

In [11]:
from pyomeca import Markers

In [12]:
def data_markers(data_path, marker_list):
    data_markers = Markers.from_c3d(data_path, usecols=[marker_list[0]])
    data_markers = data_markers.meca.time_normalize(n_frames=avg)
    data_markers = data_markers.meca.to_wide_dataframe()
    for i in range(len(marker_list)-1):
        
        tmp_markers = Markers.from_c3d(data_path, usecols=[marker_list[i+1]])
        tmp_markers = tmp_markers.meca.time_normalize(n_frames=avg)  
        tmp_markers = tmp_markers.meca.to_wide_dataframe()
        data_markers = data_markers.join(tmp_markers)

        
    cols = [c for c in data_markers.columns if c.lower()[:4] != 'ones']
    markers_dataframe = data_markers[cols]
    
    return markers_dataframe

In [13]:
dm = data_markers(filelist[1], marker_list)
dm

Unnamed: 0_level_0,x_LANK,y_LANK,z_LANK,x_RANK,y_RANK,z_RANK,x_LKNE,y_LKNE,z_LKNE,x_RKNE,...,z_RWRA,x_RHEE,y_RHEE,z_RHEE,x_LHEE,y_LHEE,z_LHEE,x_CentreOfMass,y_CentreOfMass,z_CentreOfMass
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.000000,-126.471069,2141.369141,58.194778,-270.662628,1776.444092,53.285305,-27.806711,2031.335327,394.241730,-340.655243,...,747.949341,-231.031326,1833.059937,42.629116,-165.231812,2185.437988,57.537159,-179.202026,1907.384399,824.281921
0.012698,-126.489167,2141.088115,58.765229,-270.134450,1775.781638,51.544988,-28.166268,2028.324172,394.136228,-340.518799,...,748.516731,-230.547253,1832.613463,41.378088,-165.105176,2185.184163,58.497646,-179.810725,1903.232123,823.831078
0.025397,-126.448102,2140.783629,59.408002,-269.613726,1775.439996,49.950900,-28.556328,2025.145372,394.094732,-340.454444,...,749.183793,-229.982094,1832.396558,40.402490,-164.899429,2184.886560,59.558547,-180.495981,1899.012819,823.470285
0.038095,-126.357344,2140.442708,60.144150,-269.090113,1775.186884,48.571891,-28.980724,2021.769822,394.115868,-340.454120,...,749.954009,-229.344596,1832.172782,39.761674,-164.628811,2184.533447,60.740871,-181.248600,1894.673537,823.215983
0.050794,-126.225442,2140.042988,61.004797,-268.567202,1774.845213,47.463386,-29.444557,2018.158434,394.202945,-340.515028,...,750.834530,-228.664482,1831.772395,39.488528,-164.307632,2184.109212,62.074553,-182.061747,1890.163222,823.085533
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1.549206,-129.627692,1364.001459,67.018521,-261.739265,975.366928,59.910446,-22.481773,1270.308675,398.705980,-333.080660,...,737.355067,-219.282807,1029.103177,47.193854,-173.231503,1414.881158,64.024973,-175.889380,1138.775931,828.996710
1.561905,-129.516697,1363.745879,67.702005,-259.779160,968.752723,58.592503,-22.438390,1267.253209,398.725455,-331.792337,...,735.706226,-217.241658,1022.310823,45.834367,-173.206095,1414.580194,65.184016,-175.913645,1132.646467,828.258347
1.574603,-129.368118,1363.462437,68.457907,-258.064615,963.891271,57.013262,-22.403173,1263.994176,398.753717,-330.657312,...,734.046863,-215.512441,1017.394734,44.470547,-173.093961,1414.254036,66.424360,-176.021871,1126.570266,827.526018
1.587302,-129.182378,1363.142973,69.297982,-256.560857,960.372432,55.289942,-22.390791,1260.523096,398.801261,-329.697413,...,732.399238,-214.043815,1013.919374,43.224957,-172.894309,1413.899059,67.759601,-176.212545,1120.523848,826.825391


In [14]:
def load_group(filenames):
    
    loaded = []
    
    for file in filenames:
        try:
            result = data_markers(file, marker_list)
            loaded.append(result)
        except:
            print(file)
                


    return loaded

In [15]:
group = load_group(filelist)

### Brakuje 4 plików, dlaczego czas sie nie zgrywa?

Baza danych <a name="baza"></a>

In [16]:
x = np.stack(group)
x.shape

(26, 127, 42)

In [17]:
print('(samples, timesteps, features) -> ', x.shape)

(samples, timesteps, features) ->  (26, 127, 42)


Kolejność, 'LANK' dla x, y, z, 'RANK' dla x, y, z ...... itd.

In [18]:
import h5py

In [19]:
def create_h5_basic(path, x):
    f = h5py.File(path, mode='w')
    f.create_dataset("x", data=x)
    f.close()

In [20]:
def create_h5(path, x, y):
    f = h5py.File(path, mode='w')
    f.create_dataset("x", data=x)
    f.create_dataset("y", data=y)
    f.close()

In [21]:
create_h5_basic("ak_basic.h5", x)

In [22]:
f=h5py.File(r'ak_basic.h5', 'r')
print(list(f.keys()))
x = f['x']
print('(samples, timesteps, features) -> ', x.shape)

['x']
(samples, timesteps, features) ->  (26, 127, 42)


# JRD

In [23]:
import math

In [24]:
def jrd_method(m1,m2,data_path):
    channels = [m1, m2]
    markers = Markers.from_c3d(data_path, usecols=channels)

    arr_distance_jrd = []
    arr_left = []
    arr_right = []
    time = []
    i = 0
    x_2 = 0
    y_2 = 0
    z_2 = 0

    while i < avg:
    
        x_2 = (markers[0][0][i] - markers[0][1][i]) ** 2
        y_2 = (markers[1][0][i] - markers[1][1][i]) ** 2
        z_2 = (markers[2][0][i] - markers[2][1][i]) ** 2
        distance = math.sqrt(x_2+y_2+z_2)
        
        time.append(i)
        arr_left.append(markers[2][0][i])
        arr_right.append(markers[2][1][i])
        arr_distance_jrd.append(distance)
    
        i += 1
        
    return arr_distance_jrd

# JRA

In [25]:
def jra_method(p1, p2, p3, data_path):
    channels = [p1, p2, p3]
    markers = Markers.from_c3d(data_path, usecols=channels)
    
    #Implementation of variables
    first_dist = []
    second_dist = []
    distance = []
    angle = []
    time = []
    
    i = 0
    xl = yl = zl = 0
    xl_1 = yl_1 = zl_1 = 0

    while i < avg:
        
        xl = (markers[0][0][i] - markers[0][1][i])
        yl = (markers[1][0][i] - markers[1][1][i])
        zl = (markers[2][0][i] - markers[2][1][i])
        
        xl_1 = (markers[0][1][i] - markers[0][2][i])
        yl_1 = (markers[1][1][i] - markers[1][2][i])
        zl_1 = (markers[2][1][i] - markers[2][2][i])
        
        time.append(i)
    
        first_dist.append(math.sqrt((xl ** 2) + (yl ** 2) + (zl ** 2)))
        second_dist.append(math.sqrt((xl_1 ** 2) + (yl_1 ** 2) + (zl_1 ** 2)))                   
        distance.append(xl * xl_1 + yl * yl_1 + zl * zl_1)
    
        angle.append(math.acos(distance[i] / (first_dist[i] * second_dist[i])))
        
        i += 1
        
    return angle


# HDF

In [26]:
from scipy.stats import skew 
def hdf_method(data_path):
    channels = ['LANK', 'RANK', 'LKNE', 'RKNE', 'LFIN', 'RFIN', 'LSHO', 'RSHO']
    markers = Markers.from_c3d(data_path, usecols=channels)
    
    #hd1 ankles
    hd1 = []
    hd1_x = 0
    #hd2 knees
    hd2 = []
    hd2_x = 0
    #hd3 wrists
    hd3 = []
    hd3_x = 0
    #hd4 shoulders
    hd4 = []
    hd4_x = 0
    
    #variables
    meanH = []
    stdH = []
    skewH = []
    HDF = [] 
    hdf = []
    time = []
    i = 0
    
    while i < avg:
    
        hd1_x = (markers[0][0][i] - markers[0][1][i]) ** 2
        hd1.append(math.sqrt(hd1_x))
        
        hd2_x = (markers[0][2][i] - markers[0][3][i]) ** 2
        hd2.append(math.sqrt(hd2_x))
        
        hd3_x = (markers[0][4][i] - markers[0][5][i]) ** 2
        hd3.append(math.sqrt(hd3_x))
        
        hd4_x = (markers[0][6][i] - markers[0][7][i]) ** 2
        hd4.append(math.sqrt(hd4_x))
        
        time.append(i)
        hdf = [hd1[i], hd2[i], hd3[i], hd4[i]]
        
        meanH.append(np.mean(hdf))
        stdH.append(np.std(hdf))
        skewH.append(skew(hdf))        
        HDF.append([meanH, stdH, skewH])
        
        i += 1
        
    return HDF[0][0], HDF[0][1], HDF[0][2]

# VDF

In [27]:
def vdf_method(data_path):
    
    channels = ['RFHD','RWRA','RSHO','RANK','LANK','RHEE','LHEE']
    markers_body = Markers.from_c3d(data_path, usecols=channels)
    
    channels_sym = ['CentreOfMass']
    markers_sym = Markers.from_c3d(data_path, usecols=channels_sym)
    
    #vd1 HEIGHT
    vd1 = []
    vd1_y = 0
    #vd2 WRIST RIGHT
    vd2 = []
    vd2_y = 0
    #vd3 SHOULDER RIGHT
    vd3 = []
    vd3_y = 0
    #vd4 ANKLE RIGHT
    vd4 = []
    vd4_y = 0
    #vd5 ANKLE LEFT
    vd5 = []
    vd5_y = 0
    #vd6 DIST. FEET LEFT AND RIGHT
    vd6 = []
    vd6_y = 0
    
    #variables
    meanV = []
    stdV = []
    VDF = [] 
    vdf = []
    time = []
    average = []
    i = 0
    
    while i < avg:
    
        vd1_y = np.array(markers_body[1][0][i])
        vd1.append(vd1_y)
        
        vd2_y = np.array(markers_body[1][1][i])
        vd2.append(vd2_y)
        
        vd3_y = np.array(markers_body[1][2][i])
        vd3.append(vd3_y)
        
        vd4_y = np.array(markers_body[1][3][i])
        vd4.append(vd4_y)
        
        vd5_y = np.array(markers_body[1][4][i])
        vd5.append(vd5_y)
        
        vd6_y = np.array((0.5 * (markers_body[1][5][i] - markers_body[1][6][i]) * markers_sym[1][0][i]))
        vd6.append(vd6_y)
        
        time.append(i)
        
        average = (vd1_y + vd2_y + vd3_y + vd4_y + vd5_y + vd6_y)
        vdf = [vd1[i], vd2[i], vd3[i], vd4[i], vd5[i], vd6[i]]
        
        meanV.append(np.mean(average))
        stdV.append(np.std(vdf))
        VDF.append([meanV, stdV])
        
        i += 1
        
    return VDF[0][0], VDF[0][1]

## New approach

In [28]:
from pyomeca import Markers

In [29]:
def data_markers(data_path):
    jrd = jrd_method('LANK', 'RANK', data_path)
    jra = jra_method('LASI','LKNE','LANK', data_path)
    meanH, stdH, skewH = hdf_method(data_path)
    meanV, stdV = vdf_method(data_path)
    
    result = [jrd, jra, meanH, stdH, skewH, meanV, stdV]
    
    return pd.DataFrame(result).T
    
    
    
#     data_markers = Markers.from_c3d(data_path, usecols=[marker_list[0]])
#     data_markers = data_markers.meca.time_normalize(n_frames=avg)
#     data_markers = data_markers.meca.to_wide_dataframe()
#     for i in range(len(marker_list)-1):
        
#         tmp_markers = Markers.from_c3d(data_path, usecols=[marker_list[i+1]])
#         tmp_markers = tmp_markers.meca.time_normalize(n_frames=avg)  
#         tmp_markers = tmp_markers.meca.to_wide_dataframe()
#         data_markers = data_markers.join(tmp_markers)

        
#     cols = [c for c in data_markers.columns if c.lower()[:4] != 'ones']
#     markers_dataframe = data_markers[cols]
    
#     return markers_dataframe

In [30]:
data_path = filelist[0]
data_markers(data_path)

Unnamed: 0,0,1,2,3,4,5,6
0,465.300408,0.115936,325.351617,133.957345,0.711506,-423119.334219,162248.767781
1,466.206599,0.114514,325.362047,133.879092,0.713702,-422967.156239,162184.326919
2,466.741404,0.112996,325.370770,133.815976,0.715968,-422383.089996,161958.916880
3,466.997511,0.111408,325.379910,133.769147,0.718230,-421474.786289,161612.609050
4,467.065255,0.109791,325.392862,133.738505,0.720435,-420345.360011,161183.785746
...,...,...,...,...,...,...,...
122,234.911029,0.140487,336.080266,147.071201,0.737797,92346.680508,31350.990638
123,220.058000,0.141037,335.934648,147.581057,0.734303,78444.985025,26186.341283
124,206.322817,0.141771,335.792896,148.059926,0.731270,64267.975488,20919.338828
125,194.246519,0.142653,335.659820,148.501198,0.728789,49848.322246,15562.150855


In [31]:
import config as cfg

In [47]:
final_db = []
final_label = []

for file in filelist:
    try:
        db_features = data_markers(file)
        final_db.append(db_features)
#         final_label.append(file[4:-4].replace("_", ""))
        final_label.append((file).index(file))
    except:
        print(file)

cut\degeneration_09.c3d
cut\degeneration_12.c3d
cut\degeneration_13.c3d
cut\degeneration_15.c3d
cut\healthy_02.c3d
cut\healthy_03.c3d
cut\healthy_05.c3d
cut\healthy_07.c3d
cut\healthy_09.c3d
cut\healthy_11.c3d
cut\healthy_13.c3d
cut\healthy_14.c3d
cut\healthy_15.c3d


Pliki powyżej nie działają problem z indexem/krzyczy że jest poza granicą indexowania

In [48]:
dbl = pd.DataFrame({'File' : pd.Series(final_label)})
dbl.shape, dbl

((13, 1),
     File
 0      0
 1      0
 2      0
 3      0
 4      0
 5      0
 6      0
 7      0
 8      0
 9      0
 10     0
 11     0
 12     0)

In [49]:
create_h5('features_db_test10.h5', final_db, dbl)

In [36]:
# create_h5_basic('features_db.h5', final_db)