In [3]:
import sys
print(sys.version)

3.6.5 (v3.6.5:f59c0932b4, Mar 28 2018, 05:52:31) 
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)]


In [32]:
import os
import json
import pandas as pd
import pandas_profiling
import numpy as np
from scipy import stats
import statsmodels.api as sm
from patsy import dmatrices

# Statistical analysis of feature computations 


## 1. Data importation and manipulation

In [5]:
def get_path_to_all_files(folder_name):
    """Recursively get all filepaths from a directory tree."""

    filepaths = []
    for dirname, dirnames, filenames in os.walk(folder_name):
        for filename in filenames:
            if '.DS_Store' not in filename:
                filepaths.append(os.path.join(dirname, filename))

    return filepaths

### 1.1 Data importation


In [6]:
results_folder = "/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results"

In [7]:
files = get_path_to_all_files(results_folder)

#### 1.1.1 Force plate feature data

In [8]:
fp_files = [file for file in files if "Vicon" in file and "cop" not in file]
fp_files[1:5]

['/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/Vicon/7_features.json',
 '/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/Vicon/3_features.json',
 '/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/Vicon/11_features.json',
 '/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/Vicon/12_features.json']

#### 1.1.2 Wii Balance Board feature data


In [9]:
wbb_files = [file for file in files if "Vicon" not in file and "cop" not in file]
wbb_files[1:5]

['/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/BalanceBoard/7_features.json',
 '/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/BalanceBoard/3_features.json',
 '/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/BalanceBoard/11_features.json',
 '/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/data/results/feature_results/Sujet4/Session2/BalanceBoard/12_features.json']

### 1.2 Data manipulation

In [10]:
def create_and_merge_dataframes(files):
    time_frames = []
    frequency_frames = []
    for filepath in files:
        with open(filepath) as json_data:
            features = json.load(json_data)
            time_features = features["time_features"]
            frequency_features = features["frequency_features"]
        time_frames.append(pd.DataFrame(time_features, index=[0]))
        frequency_frames.append(pd.DataFrame(frequency_features,index=[0]))
    
    time_features_df = pd.concat(time_frames, axis=0)
    frequency_features_df = pd.concat(frequency_frames, axis=0)
    
    df1 = time_features_df.reset_index().drop('index', 1)
    df2 = frequency_features_df.reset_index().drop('index', 1)
    
    return (df1,df2)

#### 1.2.1 WBB  features results merging

In [11]:
wbb_time,wbb_freq = create_and_merge_dataframes(wbb_files)

In [12]:
wbb_time.head()

Unnamed: 0,Rd mean distance,ml mean distance,ap mean distance,Rd rms distance,ml rms distance,ap rms distance,Rd path length,ml path length,ap path length,Rd mean velocity,...,ml range,ap range,95% confidence circle area,95% confidence elipse area,Sway area,Mean frequency,Mean frequency-ml,Mean frequency-ap,Fractal dimension-CC,Fractal dimension-CE
0,5.853794,4.139257,6.215491,7.236378,5.116892,7.8126,1156.977038,834.744011,649.542621,14.236739,...,38.376332,37.7309,518.906165,365.121982,41.086883,0.387073,0.438674,0.227323,1.507686,1.54387
1,3.104017,2.194871,2.57986,3.802854,2.689024,3.178997,599.786864,390.51487,359.453877,7.389541,...,11.398411,31.964583,141.787871,,8.674694,0.37889,0.387502,0.303454,1.506103,
2,2.47834,1.752451,3.231798,2.964012,2.095873,4.473267,551.248566,383.585738,309.743555,6.794298,...,8.844581,25.678797,83.412492,,9.115337,0.436319,0.476913,0.208824,1.543325,
3,0.702834,0.496979,0.349887,0.902053,0.637848,0.436807,596.387538,451.87802,297.929853,7.381034,...,4.889105,8.482124,8.377497,,1.411116,1.671415,1.989284,1.862945,1.854726,
4,1.241501,0.877874,0.314234,1.422258,1.005688,0.397117,591.560884,472.556506,263.905482,7.288195,...,4.65821,4.318738,17.839525,,1.809374,0.934314,1.172377,1.829114,1.743681,


In [13]:
wbb_freq.head()

Unnamed: 0,Rd Total power,ML Total power,AP Total power,Rd peak frequency,ML peak frequency,AP peak frequency,50% Rd peak frequency,50% ML peak frequency,50% AP peak frequency,80% Rd peak frequency,80% ML peak frequency,80% AP peak frequency
0,0.238545,0.12553,0.088516,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
1,0.043231,0.022063,0.014657,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
2,0.040298,0.020813,0.012053,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
3,0.048451,0.031087,0.013026,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
4,0.05951,0.031745,0.010156,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0


#### 1.2.2 FP  features results merging

In [14]:
fp_time,fp_freq = create_and_merge_dataframes(fp_files)

In [15]:
fp_time.head()

Unnamed: 0,Rd mean distance,ml mean distance,ap mean distance,Rd rms distance,ml rms distance,ap rms distance,Rd path length,ml path length,ap path length,Rd mean velocity,...,ml range,ap range,95% confidence circle area,95% confidence elipse area,Sway area,Mean frequency,Mean frequency-ml,Mean frequency-ap,Fractal dimension-CC,Fractal dimension-CE
0,8.61502,6.091739,4.361874,10.870381,7.68652,5.484854,1045.610464,639.099157,705.946268,38.870277,...,38.525438,32.600497,1197.053665,406.322996,109.116541,0.718094,0.689445,1.063583,1.475652,1.600764
1,3.79891,2.686235,2.768912,4.644603,3.28423,3.297197,199.092965,162.98867,86.984544,7.252931,...,14.822461,14.165427,210.966074,59.355686,10.806937,0.303861,0.390747,0.202309,1.323339,1.441706
2,6.473231,4.577266,1.986784,8.296883,5.866782,2.376812,169.825584,140.786199,68.860825,6.137535,...,27.698238,10.659815,707.86656,,8.03555,0.150901,0.196503,0.221431,1.20398,
3,0.589418,0.416781,0.596368,0.717435,0.507303,0.788092,85.554779,39.582178,66.405673,3.091969,...,2.619592,9.064748,5.005497,,0.88878,0.834895,0.606746,0.711389,1.525445,
4,0.436363,0.308555,1.019478,0.539198,0.38127,1.147648,90.988292,42.467225,71.406114,3.253067,...,1.850596,4.463669,2.879589,,1.063007,1.186492,0.869868,0.44268,1.605404,


In [16]:
fp_freq.head()

Unnamed: 0,Rd Total power,ML Total power,AP Total power,Rd peak frequency,ML peak frequency,AP peak frequency,50% Rd peak frequency,50% ML peak frequency,50% AP peak frequency,80% Rd peak frequency,80% ML peak frequency,80% AP peak frequency
0,1.402389,0.768656,1.131305,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
1,0.104832,0.053719,0.014431,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
2,0.073013,0.03714,0.008329,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
3,0.005709,0.002946,0.0136,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0
4,0.00645,0.003687,0.010739,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0


## 2. Statistical analysis

### 2.1 General descriptive statistics HTML reports 

In [17]:
outputfile = "/Users/Antonin/Documents/VUB/semester 4/thesis/validation study/notebooks/results/wbb_data_time_domain_report.html"
df_profile = pandas_profiling.ProfileReport(wbb_time, bins=50)
df_profile.to_file(outputfile=outputfile)

### 2.2 Features mean value and standard deviation 

In [18]:
wbb_time_mean_df = pd.DataFrame(wbb_time.mean(), columns = ["WBB mean"])
wbb_time_std_df = pd.DataFrame(wbb_time.std(), columns = ["WBB std"])

In [19]:
wbb_freq_mean_df = pd.DataFrame(wbb_freq.mean(), columns = ["WBB mean"])
wbb_freq_std_df = pd.DataFrame(wbb_freq.std(), columns = ["WBB std"])

In [20]:
fp_time_mean_df = pd.DataFrame(fp_time.mean(), columns = ["FP mean"])
fp_time_std_df = pd.DataFrame(fp_time.std(), columns = ["FP std"])

In [21]:
fp_freq_mean_df = pd.DataFrame(fp_freq.mean(), columns = ["FP mean"])
fp_freq_std_df = pd.DataFrame(fp_freq.std(), columns = ["FP std"])

In [22]:
time_domain_results = pd.concat([wbb_time_mean_df, wbb_time_std_df, fp_time_mean_df, fp_time_std_df], axis = 1)

In [23]:
time_domain_results

Unnamed: 0,WBB mean,WBB std,FP mean,FP std
Rd mean distance,13.552456,32.432208,6.480793,4.714154
ml mean distance,9.583034,22.933034,4.582613,3.33341
ap mean distance,4.680756,6.040587,9.069151,21.435949
Rd rms distance,16.054781,36.388456,8.252971,5.98865
ml rms distance,11.352445,25.730524,5.835732,4.234615
ap rms distance,5.90149,7.349371,10.58383,23.380725
Rd path length,954.478377,1665.885733,657.231875,669.274025
ml path length,723.283413,1326.190177,372.935958,295.920975
ap path length,474.195543,890.987671,445.909521,573.572235
Rd mean velocity,12.067203,20.541134,23.245698,23.663873


### 2.3 Pearson's correlation coefficient and regression lines equations 


In [26]:
coeffs = fp_time.corrwith(wbb_time)
coeffs

Rd mean distance             -0.045254
ml mean distance             -0.045254
ap mean distance             -0.051826
Rd rms distance              -0.043851
ml rms distance              -0.043851
ap rms distance              -0.046994
Rd path length               -0.036585
ml path length               -0.041686
ap path length               -0.035293
Rd mean velocity             -0.039386
ml mean velocity             -0.047307
ap mean velocity             -0.035695
Rd range                     -0.009905
ml range                     -0.013620
ap range                     -0.043081
95% confidence circle area    0.054723
95% confidence elipse area   -0.030814
Sway area                    -0.018345
Mean frequency                0.046761
Mean frequency-ml             0.101756
Mean frequency-ap             0.049040
Fractal dimension-CC         -0.005997
Fractal dimension-CE         -0.013227
dtype: float64

In [36]:
stats.pearsonr(fp_time["Rd mean distance"],wbb_time["Rd mean distance"])

ValueError: operands could not be broadcast together with shapes (519,) (511,) 

### 2.4 Two-tailed paired-sample statistical test



### 2.5 Intraclass correlation coefficient (ICC) 



### 2.6 Bland and Altman plots + LOA (limit of agreement)