In [5]:
csv_file='data/all_features.csv'

In [6]:
from falldetection.feature_extractor_workflow import extract_features_and_save, FeatureExtractorWorkflow

extract_features_and_save(baseDir='data/FallDataSet', csv_file=csv_file)

In [47]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
from time import time
from IPython.display import display # Allows the use of display() for DataFrames

# Pretty display for notebooks
%matplotlib inline

# Load the Census dataset
data = pd.read_csv(csv_file)

# Success - Display the first record
display(data.head(n=10))
data.dtypes

Unnamed: 0.1,Unnamed: 0,fall,feature,sensorFile
0,0,False,[ 8.53025913e+00 1.10629320e+01 9.80561983e+...,data/FallDataSet/209/Testler Export/813/Test_5...
1,1,False,[ 8.49761963e+00 1.10244751e+01 9.72413096e+...,data/FallDataSet/209/Testler Export/813/Test_3...
2,2,False,[ 8.18564892e+00 1.04157209e+01 9.42503651e+...,data/FallDataSet/209/Testler Export/813/Test_1...
3,3,False,[ 8.73000622e+00 1.04937792e+01 9.66711705e+...,data/FallDataSet/209/Testler Export/813/Test_2...
4,4,False,[ 8.18207264e+00 1.18455410e+01 9.77535177e+...,data/FallDataSet/209/Testler Export/813/Test_6...
5,5,False,[ 8.22041035e+00 1.15673304e+01 9.71542939e+...,data/FallDataSet/209/Testler Export/813/Test_4...
6,6,True,[-6.93664551e+00 4.64452744e+01 3.18962890e+...,data/FallDataSet/209/Testler Export/906/Test_5...
7,7,True,[-2.07896948e+01 4.38634872e+01 2.87272057e+...,data/FallDataSet/209/Testler Export/906/Test_3...
8,8,True,[-1.70715332e+01 3.96757603e+01 2.65593364e+...,data/FallDataSet/209/Testler Export/906/Test_1...
9,9,True,[-1.00956917e+01 3.54853630e+01 4.06466475e+...,data/FallDataSet/209/Testler Export/906/Test_2...


Unnamed: 0     int64
fall            bool
feature       object
sensorFile    object
dtype: object

In [48]:
# TODO: Total number of records
n_records = len(data)

# TODO: Number of records where individual's income is more than $50,000
n_fall = len(data[data['fall'] == True])
n_adl = len(data[data['fall'] == False])

# Print the results
print("Total number of records: {}".format(n_records))
print("Number of falls: {}".format(n_fall))
print("Number of ADLs: {}".format(n_adl))

Total number of records: 3299
Number of falls: 1824
Number of ADLs: 1475


In [53]:
y = data['fall']
X = data.drop(columns=['fall', 'sensorFile', 'Unnamed: 0'])
X.head()

Unnamed: 0,feature
0,[ 8.53025913e+00 1.10629320e+01 9.80561983e+...
1,[ 8.49761963e+00 1.10244751e+01 9.72413096e+...
2,[ 8.18564892e+00 1.04157209e+01 9.42503651e+...
3,[ 8.73000622e+00 1.04937792e+01 9.66711705e+...
4,[ 8.18207264e+00 1.18455410e+01 9.77535177e+...


In [54]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size = 0.2, 
                                                    random_state = 815)

# Show the results of the split
print("Training set has {} samples.".format(X_train.shape[0]))
print("Testing set has {} samples.".format(X_test.shape[0]))

Training set has 2639 samples.
Testing set has 660 samples.


In [8]:
# taken from https://stackoverflow.com/questions/26083293/calculating-autocorrelation-of-pandas-dataframe-along-each-column
def df_autocorr(df, lag=1, axis=0):
    """Compute full-sample column-wise autocorrelation for a DataFrame."""
    return df.apply(lambda col: col.autocorr(lag), axis=axis)

df_autocorr(data)

Acc_X    0.993683
Acc_Y    0.462005
Acc_Z    0.983735
Gyr_X    0.569293
Gyr_Y    0.954030
Gyr_Z    0.685405
Mag_X    0.999335
Mag_Y    0.983643
Mag_Z    0.999620
dtype: float64

In [9]:
np.fft.fft(data['Acc_X'])

array([ 1.63220837e+03+0.00000000e+00j,  2.41796075e+02-1.48426686e+03j,
       -2.21713373e+02-6.88929245e+01j,  2.11453757e+02-4.03372222e+02j,
       -1.80211405e+02-1.27710525e+02j,  1.64408081e+02-1.58443285e+02j,
       -1.30429008e+02-1.59796150e+02j,  1.16147538e+02-5.64317705e+01j,
       -7.87941479e+01-1.61502777e+02j,  6.98993831e+01-9.94053147e+00j,
       -4.23755797e+01-1.51913519e+02j,  3.95312031e+01+5.52211586e+00j,
       -1.78977660e+01-1.34493243e+02j,  2.00803825e+01+7.18920685e+00j,
       -1.27537417e+00-1.15214729e+02j,  7.38371378e+00+4.83999668e+00j,
        8.60014348e+00-9.55803441e+01j, -1.43228415e+00-2.89336374e+00j,
        1.62140350e+01-7.74876305e+01j, -7.81225516e+00-1.07208204e+01j,
        2.20307927e+01-6.10252477e+01j, -1.25140613e+01-1.84932165e+01j,
        2.57107886e+01-4.55066873e+01j, -1.54867397e+01-2.77061931e+01j,
        2.63778946e+01-3.08124269e+01j, -1.46956560e+01-3.53089985e+01j,
        2.37955738e+01-1.91212712e+01j, -1.01039074

${\mathit{ACC}}=\frac{\mathit{TP}+\mathit{TN}}{\mathit{TP}+\mathit{FP}+\mathit{FN}+\mathit{TN}}$

In [55]:
from sklearn.svm import SVC

clf = SVC(random_state = 815)
clf.fit(X_train, y_train)

ValueError: could not convert string to float: '[-2.31323242e+00  2.13268995e+01  4.91210777e+00  2.80650923e+01\n  5.21219481e-01 -4.48831145e-01 -1.65832520e+01  8.14485550e+00\n -4.61349110e+00  3.19616659e+01  5.29271590e-02 -1.64979224e+00\n -3.89556885e+00  7.79869556e+00  1.22508578e+00  2.55751896e+00\n  9.49979922e-01  4.87282675e+00 -3.60695842e+00  1.17031809e+00\n -8.65782666e-02  3.52293879e-01 -3.61177183e+00  1.83242711e+01\n -1.44160598e+00  1.56950444e+00 -5.18040606e-02  1.58804469e-01\n -2.34484565e-01  5.80924041e+00 -3.40083346e-01  3.70720204e+00\n  4.02279148e-01  9.77821006e-01  2.33332752e+00  4.00427865e+00\n -7.15332031e-01 -3.67919922e-01 -5.62770730e-01  1.33116209e-02\n  1.91028491e-01 -1.77598533e+00 -6.44042969e-01  5.96191406e-01\n -2.18276222e-02  3.24189766e-01 -6.08160205e-03 -1.97365927e+00\n -5.81542969e-01 -1.69189453e-01 -3.43496190e-01  2.09732916e-02\n  1.51531413e-01 -1.76079804e+00]'

In [5]:
from sklearn.metrics import fbeta_score

predictions_test = clf.predict(X_test)
print(list(predictions_test))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [6]:
score = fbeta_score(y_test, predictions_test, average='binary', beta=0.5)
score

  'precision', 'predicted', average, warn_for)


0.0