In [1]:
import pandas as pd
import numpy as np
import scipy.io as sio
import os
import re
import source.load_mat_files as mat_fls

## Load data from .mat files and save them in csv files

### Get list of files to load: 

In [2]:
file_list = os.listdir('Data')
file_list_mat = [x for x in  file_list if x[-4:] == '.mat']
len(file_list_mat)

86

### Load all files into a dictionary: 

In [3]:
os.chdir('Data')

data_all = dict()

for file in file_list_mat:
    mat_contents = sio.loadmat(file)
    data_all[file] = mat_contents

### Sort file names in list alpha-numerically:

In [4]:
file_list_mat.sort()
#file_list_mat

### Select the variables to load: 

In [5]:
num_elements = len(data_all[file_list_mat[0]]['results'].dtype.descr)

variables_all = []
for index in range(0, num_elements):
    variable_name = data_all[file_list_mat[0]]['results'].dtype.descr[index][0]
    variables_all.append(variable_name)


In [6]:
variables_to_add = variables_all.copy()

#### Remove variables that I don't know what they refer to:

In [7]:
variables_to_add.remove('nfoot')
variables_to_add.remove('steps_tot')

### Loop through dictionary and stack data in pandas dataframe:

In [8]:
round_counter = 0
list_of_frames = []

for file_name, data_item in data_all.items():

    data_frame_to_stack = mat_fls.identify_get_timestamps(file_name, data_item)

    for variable_name in variables_to_add:
        data_frame_to_stack = mat_fls.identify_get_variable(file_name, variable_name,
                                                data_item, data_frame_to_stack)
    list_of_frames.append(data_frame_to_stack)


In [9]:
data_wide = pd.concat(list_of_frames)

In [10]:
os.getcwd()

'/app/Data'

### Save wide format dataframe as csv-file:

In [11]:
data_wide.to_csv('data_table_wide.csv', sep=',')

In [12]:
data_wide_copy = data_wide.copy()

### Unpivot dataframe (to long format):

In [13]:
col_list = list(data_wide_copy.columns)
col_list.index('morning_afternoon')

3

#### Separate id-columsn from variable columns:

In [15]:
id_columns = data_wide_copy.columns[0:4]
value_columns = data_wide_copy.columns[4:]
print(id_columns)
print(value_columns)

Index(['subject', 'time_stamps', 'time_stamps_hours', 'morning_afternoon'], dtype='object')
Index(['WB_time', 'Ngait_cycles', 'gait_timestamps', 'cadence_mean',
       'speed_mean', 'speed_std', 'slength_mean', 'slength_std', 'gtime_mean',
       'gtime_std', 'speed_CV', 'slength_CV', 'gtime_CV'],
      dtype='object')


#### Save data:

In [16]:
data_long = data_wide_copy.melt(id_vars=id_columns, value_vars=value_columns,
                    var_name='variable', value_name='value')

### Save long format dataframe as csv:

In [17]:
data_long.to_csv('data_table_long.csv', sep=',')