In [1]:
import matplotlib.pyplot as plt

from dataloader import load_patient_task_data_from_txt, clean_and_verify

## Determine valid columns

We want to eliminate features that are not present for all samples, as we won't be able to always feed them to our model.

In [2]:
# All columns
total_cols = ['RTA', 'LTA', 'IO', 'ECG', 'RGS', 'acc_x_left_shank', 'acc_y_left_shank', 'acc_z_left_shank', 'gyro_x_left_shank', 'gyro_y_left_shank', 'gyro_z_left_shank', 'NC_invalid_0', 'acc_x_right_shank', 'acc_y_right_shank', 'acc_z_right_shank', 'gyro_x_right_shank', 'gyro_y_right_shank', 'gyro_z_right_shank', 'NC_invalid_1', 'acc_x_waist', 'acc_y_waist', 'acc_z_waist', 'gyro_x_waist', 'gyro_y_waist', 'gyro_z_waist', 'NC_invalid_2', 'acc_x_arm', 'acc_y_arm', 'acc_z_arm', 'gyro_x_arm', 'gyro_y_arm', 'gyro_z_arm', 'SC']

# Merging left_shank and right_shank to shank
for i, col in enumerate(total_cols):
    if 'left' in col:
        total_cols[i] = ''.join(col.split('_left'))
    if 'right' in col:
        total_cols[i] = ''.join(col.split('_right'))

In [4]:
# Get list of unusable features
unusable = []
for patient_id in ['001', '002', '003', '004', '005', '006', '007', '008-1', '008-2', '009', '010', '011', '012']:
    for task_num in [i for i in range(1, 7)]:
        patient_x_task_y_data = load_patient_task_data_from_txt(patient_id, task_num)
        patient_x_task_y_data = clean_and_verify(patient_x_task_y_data)
        
        if not patient_x_task_y_data.empty:
            cols = patient_x_task_y_data.columns.values.tolist()
            unusable += [col for col in total_cols if col not in cols]
        else:
            print(f'No data found for patient_id={patient_id}, task={task_num}')
                  
unusable = list(set(unusable))
print(f'\nCannot use the following features: {unusable}')

No data found for patient_id=001, task=5
No data found for patient_id=001, task=6
No data found for patient_id=002, task=5
No data found for patient_id=002, task=6
No data found for patient_id=003, task=5
No data found for patient_id=003, task=6
No data found for patient_id=004, task=6
No data found for patient_id=005, task=5
No data found for patient_id=005, task=6
No data found for patient_id=006, task=5
No data found for patient_id=006, task=6
No data found for patient_id=007, task=5
No data found for patient_id=007, task=6
No data found for patient_id=008-1, task=6
No data found for patient_id=008-2, task=5
No data found for patient_id=008-2, task=6
No data found for patient_id=010, task=5
No data found for patient_id=010, task=6
No data found for patient_id=011, task=5
No data found for patient_id=011, task=6
No data found for patient_id=012, task=5
No data found for patient_id=012, task=6
Cannot use the following features: ['SC', 'acc_x_waist', 'NC_invalid_2', 'acc_z_arm', 'NC_in

In [5]:
# Get list of usable features
usable = [col for col in total_cols if col not in unusable]
print(f'We can use the following features: {usable}')

We can use the following features: ['RTA', 'LTA', 'IO', 'ECG', 'RGS', 'acc_x_shank', 'acc_y_shank', 'acc_z_shank', 'gyro_x_shank', 'gyro_y_shank', 'gyro_z_shank', 'acc_x_shank', 'acc_y_shank', 'acc_z_shank', 'gyro_x_shank', 'gyro_y_shank', 'gyro_z_shank']


## Amalgamate data from different patients, tasks

In [15]:
lens = []
for patient_id in ['001', '002', '003', '004', '005', '006', '007', '008-1', '008-2', '009', '010', '011', '012']:
    for task_num in [i for i in range(1, 7)]:
        patient_x_task_y_data = load_patient_task_data_from_txt(patient_id, task_num)
        lens.append([patient_id, task_num, len(patient_x_task_y_data)])
        #patient_x_task_y_data = clean_and_verify(patient_x_task_y_data)
        

In [10]:
patient_x_task_y_data

Unnamed: 0,time,RTA,LTA,IO,ECG,RGS,acc_x_waist,acc_y_waist,acc_z_waist,gyro_x_waist,...,gyro_y_arm,gyro_z_arm,SC,label,acc_x_shank,gyro_x_shank,acc_y_shank,gyro_y_shank,acc_z_shank,gyro_z_shank
0,12:04:38.002,-918.0,23.0,-10.5,-356.5,-200.0,7922.384340,625.387939,2370.594366,31.084582,...,-723.735803,533.373304,1830.000025,0,8012.323784,-225.946476,-2550.228411,-101.696846,-1293.335240,85.614056
1,12:04:38.004,-891.5,74.0,-10.5,-269.5,-7.5,7905.650713,649.395150,2367.379429,30.093351,...,-762.931999,563.748359,1830.000019,0,8013.199022,-223.981682,-2567.976963,-93.752486,-1310.214708,83.179649
2,12:04:38.006,-731.0,247.0,-10.5,-242.0,301.0,7888.924917,671.608392,2364.867308,28.259828,...,-801.660294,595.436762,1830.000000,0,8021.112370,-222.443651,-2589.211311,-88.059704,-1336.726557,80.038214
3,12:04:38.008,-607.5,378.0,-10.5,-245.5,562.0,7872.332747,689.614422,2365.570125,24.817537,...,-835.992392,623.750109,1829.999987,0,8035.550478,-220.670413,-2609.897105,-83.511281,-1366.958937,76.531186
4,12:04:38.010,-491.5,321.0,-9.5,-258.0,813.0,7856.000000,701.000000,2372.000000,19.000000,...,-862.000000,644.000000,1830.000000,0,8056.000000,-218.000000,-2626.000000,-79.000000,-1395.000000,73.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180495,12:10:38.992,-598.5,-37.5,-19.5,193.0,-194.0,7750.933776,30.606420,-3452.817377,-13.809834,...,76.215742,-7.082927,1640.000109,0,8100.139833,-109.426247,-1689.587782,-7.821348,-1911.703935,-8.044901
180496,12:10:38.994,-607.0,-43.5,-18.5,201.5,-211.0,7749.490421,29.880479,-3459.834617,-11.453963,...,77.931527,-7.179571,1640.000135,0,8094.448841,-108.721276,-1690.430406,-6.849944,-1910.674232,-7.856461
180497,12:10:38.996,-568.5,-13.5,-17.5,241.5,-170.5,7749.080177,31.151329,-3468.743170,-8.493174,...,79.239440,-7.234752,1640.000107,0,8089.087934,-107.903181,-1691.279140,-6.167867,-1908.992562,-7.545571
180498,12:10:38.998,-545.5,-24.5,-17.0,248.5,-143.5,7750.113289,34.748119,-3478.234482,-5.488257,...,80.231569,-7.193288,1640.000053,0,8085.218018,-106.990057,-1691.885249,-5.857193,-1906.740595,-7.223121


In [16]:
lens

[['001', 1, 180500],
 ['001', 2, 166000],
 ['001', 3, 14500],
 ['001', 4, 15500],
 ['001', 5, 0],
 ['001', 6, 0],
 ['002', 1, 121500],
 ['002', 2, 119500],
 ['002', 3, 13000],
 ['002', 4, 14500],
 ['002', 5, 0],
 ['002', 6, 0],
 ['003', 1, 643500],
 ['003', 2, 315500],
 ['003', 3, 38000],
 ['003', 4, 37500],
 ['003', 5, 0],
 ['003', 6, 0],
 ['004', 1, 94000],
 ['004', 2, 180000],
 ['004', 3, 13500],
 ['004', 4, 16000],
 ['004', 5, 18500],
 ['004', 6, 0],
 ['005', 1, 122000],
 ['005', 2, 10500],
 ['005', 3, 11000],
 ['005', 4, 9500],
 ['005', 5, 0],
 ['005', 6, 0],
 ['006', 1, 227000],
 ['006', 2, 237500],
 ['006', 3, 33000],
 ['006', 4, 33000],
 ['006', 5, 0],
 ['006', 6, 0],
 ['007', 1, 148500],
 ['007', 2, 143500],
 ['007', 3, 25000],
 ['007', 4, 26000],
 ['007', 5, 0],
 ['007', 6, 0],
 ['008-1', 1, 218500],
 ['008-1', 2, 183500],
 ['008-1', 3, 25000],
 ['008-1', 4, 14000],
 ['008-1', 5, 12500],
 ['008-1', 6, 0],
 ['008-2', 1, 186500],
 ['008-2', 2, 187500],
 ['008-2', 3, 36000],
 ['

In [13]:
min(lens)

0

In [14]:
min([l for l in lens if l != 0])

9500