In [347]:
import pandas as pd
from scipy.interpolate import interp1d
from scipy import signal

In [348]:
import scipy.io
mat = scipy.io.loadmat('gait_grf_rawdata.mat')

In [349]:
mat['data_gait_speed'].shape

(1278, 1)

In [350]:
mat['data_subject_id'].shape

(1278, 1)

In [351]:
mat['data_analog_fp2'].shape

(1278, 1)

In [352]:
mat['data_analog_fp1'].shape

(1278, 1)

In [353]:
subject = scipy.io.loadmat('gait_grf_subject.mat')

In [354]:
subject.keys()

dict_keys(['__header__', '__version__', '__globals__', 'subject_age', 'subject_bodymass', 'subject_bodysize', 'subject_gender', 'subject_id', 'subject_shoesize'])

In [355]:
subject_info = pd.read_csv('gait_grf_subject.txt', sep=";", index_col = 'subject_id [number]')

In [356]:
X = pd.DataFrame()

In [357]:
feat = dict()
for feature in subject_info.columns:
    feat[feature] = []

for ID in mat['data_subject_id']:
    for feature in subject_info.columns:
        feat[feature].append(subject_info.loc[ID[0]][feature])

for feature in subject_info.columns:
    X[feature] = feat[feature]
X['Speed'] = mat['data_gait_speed']
X['BMI'] = X['body mass [kg]']/(X['body size [m]']*X['body size [m]'])
X['frac'] = X['body mass [kg]']/X['body size [m]']

In [358]:
X = X.fillna(0)

In [359]:
threshold = 5

samp = 2
new_x = range(0,100 + samp,samp)

fc = 35  # Cut-off frequency of the filter
fs = 1000 # sampling frequency
w = fc / (fs / 2) # Normalize the frequency
b, a = signal.butter(5, w, 'low')

col_list = []

for i in range(0,100 + samp,samp):
    col_list.append('Stance '+ str(i))

Y = pd.DataFrame(index = X.index, columns = col_list)

for ind in range(mat['data_analog_fp2'].shape[0]):
    abscisse = 0
    stored = 0
    list_extract = []
    list_abscisse = []

    output = signal.filtfilt(b, a, mat['data_analog_fp2'][ind][0][6,:])
    
    for val in output:
        if val > threshold and stored > threshold:
            list_extract.append(val)
            list_abscisse.append(abscisse)

        stored = val
        abscisse+=1

    list_abscisse = [(x -list_abscisse[0])*100 / (list_abscisse[-1]-list_abscisse[0]) for x in list_abscisse]
    f = interp1d(list_abscisse, list_extract)

    new_val = f(new_x)
    Y.loc[ind,:] = new_val


In [360]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X_scale = scaler.transform(X)

In [361]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y['Stance 70'], test_size=0.33, random_state=42)

In [362]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=100, random_state=0, n_estimators = 100)
regr.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=100,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

In [363]:
y_train

1112    165.734
299      219.26
838     231.879
909     265.997
1146    218.301
755     214.895
671     187.088
1212    351.324
892     424.221
597     251.761
771     81.1388
1193    279.541
723     262.814
885     252.032
1105    383.615
273     269.289
969     279.319
362     379.427
237     294.691
1170    270.729
519     202.171
2       158.083
6       160.617
370     226.077
311     252.951
1097    333.026
661     183.862
560     143.209
1264    430.593
978     301.605
         ...   
699     264.196
510      174.87
1082    212.777
474      112.12
856     228.688
747     171.666
252      341.06
21      243.479
459     165.521
1184    91.2587
276     273.992
955     383.292
1215    300.884
385     154.944
805     150.802
343     265.969
769     145.811
130     268.332
871     111.075
1123    251.358
87      368.905
330     333.693
1238    160.512
466     138.197
121     224.756
1044    318.222
1095    317.226
1130     173.46
860     153.683
1126    240.814
Name: Stance 70, Length:

In [364]:
pred = regr.predict(X_test)
#pred = regressor_svm.predict(X_test)
#pred = gnb.predict(X_test)

In [365]:
import plotly.express as px

fig = px.scatter(x=y_test, y=pred, labels = {'x':'Measured value', 'y':'Predicted value'},title = 'GRF prediction at 70% of stance')
fig.show()