In [22]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder,MinMaxScaler, KBinsDiscretizer, LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

data = pd.read_csv("newfall.csv", thousands=',', low_memory=False)
data["FALL"] = LabelEncoder().fit_transform(data["FALL"])
data["FALL"].value_counts()

0    390
1     12
Name: FALL, dtype: int64

In [23]:
y = data["FALL"]
X = data.drop("FALL", axis=1)

X = pd.DataFrame(X)
feature_names = list(X.columns.values)
print(feature_names)

['Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz', 'Pressure', 'Temperature']


In [24]:
min_max_scaler = MinMaxScaler(feature_range=(-1,1))
data_scale = min_max_scaler.fit_transform(X)
X = pd.DataFrame(data_scale)

feature_dict = {}
for i in range(len(feature_names)):
    feature_dict[i] = feature_names[i]
X = X.rename(columns=feature_dict)
X.head()

Unnamed: 0,Ax,Ay,Az,Gx,Gy,Gz,Pressure,Temperature
0,-0.079951,0.252632,-0.302326,-0.008927,-0.034086,-0.197864,0.055829,-1.0
1,-0.068143,0.255921,-0.305102,0.032761,-0.037656,0.131682,-0.429195,-1.0
2,-0.060763,0.261184,-0.31135,0.031724,-0.055244,0.130883,-0.37323,-1.0
3,-0.058795,0.269737,-0.325928,0.029923,-0.059074,0.135304,-0.466503,-1.0
4,-0.046986,0.261184,-0.326623,0.031083,-0.060424,0.144994,-0.559777,-1.0


In [25]:
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=2)
X_over, y_over = sm.fit_sample(X, y)
print("After OverSampling, counts of label '1': {}".format(sum(y_over==1)))
print("After OverSampling, counts of label '0': {}".format(sum(y_over==0)))
print("-------------------------------------------------------------")

After OverSampling, counts of label '1': 390
After OverSampling, counts of label '0': 390
-------------------------------------------------------------


In [28]:
from sklearn.metrics.scorer import make_scorer
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.model_selection import cross_val_predict
from sklearn import tree
from sklearn import linear_model


def cv_train_test_split(X, y):
    X_Train, Y_Train, X_Test, Y_Test = [], [], [], []
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=10)
    for train_index, test_index in skf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            X_Train.append(X_train)
            X_Test.append(X_test)
            Y_Train.append(y_train)
            Y_Test.append(y_test)
    return X_Train, Y_Train, X_Test, Y_Test

feature_train, label_train, feature_test, label_test = cv_train_test_split(X_over, y_over)

def model(m, feature_train, label_train, feature_test, label_test, X_und, y_und):
        if m == "linear":
            lin_cla  = linear_model.SGDClassifier(loss='modified_huber', penalty='elasticnet', alpha=1e-4, 
                                              random_state=42, shuffle=True, n_jobs=None)
            print("Linear-based Model:")
            printScore(lin_cla, feature_train, label_train, feature_test, label_test)
        elif m == "tree": 
            tree_cla = tree.DecisionTreeClassifier(criterion = "entropy")
            print("Tree-based Model:")
            printScore(tree_cla, feature_train, label_train, feature_test, label_test)


        
def printScore(model, feature_train, label_train, feature_test, label_test):
        from prettytable import PrettyTable
        t = PrettyTable(['id', 'accuracy', 'precision of fall', 'recall of fall'])
        sum_acc, sum_pre, sum_rec = 0,0,0
        for i in range(10):
            model.fit(feature_train[i], label_train[i])
            y_pred = model.predict(feature_test[i])
            acc = accuracy_score(label_test[i], y_pred)
            pre = precision_score(label_test[i], y_pred, average = None, labels=[1])
            rec = recall_score(label_test[i], y_pred, average = None, labels=[1])
            t.add_row([i+1, '{0:.3f}'.format(float(acc)), '{0:.3f}'.format(float(pre)), '{0:.3f}'.format(float(rec))])
            sum_acc += float(acc)
            sum_pre += float(pre)
            sum_rec += float(rec)
        avg_acc = sum_acc/10
        avg_pre = sum_pre/10
        avg_rec = sum_rec/10
        t.add_row([" Average ", '{0:.3f}'.format(avg_acc), '{0:.3f}'.format(avg_pre), '{0:.3f}'.format(avg_rec)])
        print(t)

model("tree", feature_train, label_train, feature_test, label_test, X_over, y_over)
# model("linear", feature_train, label_train, feature_test, label_test, X_over, y_over)

Tree-based Model:
+-----------+----------+-------------------+----------------+
|     id    | accuracy | precision of fall | recall of fall |
+-----------+----------+-------------------+----------------+
|     1     |  0.974   |       1.000       |     0.949      |
|     2     |  1.000   |       1.000       |     1.000      |
|     3     |  0.974   |       0.951       |     1.000      |
|     4     |  0.692   |       0.623       |     0.974      |
|     5     |  1.000   |       1.000       |     1.000      |
|     6     |  0.987   |       1.000       |     0.974      |
|     7     |  0.987   |       1.000       |     0.974      |
|     8     |  0.923   |       0.867       |     1.000      |
|     9     |  0.577   |       0.542       |     1.000      |
|     10    |  0.885   |       0.841       |     0.949      |
|  Average  |  0.900   |       0.882       |     0.982      |
+-----------+----------+-------------------+----------------+
Linear-based Model:
+-----------+----------+--------