In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings('ignore', module='sklearn')

from sklearn.preprocessing import MinMaxScaler, LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier 

import matplotlib.pyplot as plt
%matplotlib inline

  from numpy.core.umath_tests import inner1d


In [2]:
from __future__ import print_function
import os
# Type Path
data_path = [r'C:\Users\wpghk\ai_data']

import pandas as pd
import numpy as np

filepath = os.sep.join(data_path + ['Human_Activity_Recognition_Using_Smartphones_Data.csv'])
data = pd.read_csv(filepath, sep=',')

In [3]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
data['Activity'] = le.fit_transform(data.Activity)

In [4]:
from sklearn.model_selection import StratifiedShuffleSplit
feature_cols = data.columns[:-1]
corr_values = data[feature_cols].corr()
# Get the split indexes
strat_shuf_split = StratifiedShuffleSplit(n_splits=1, 
                                          test_size=0.3, 
                                          random_state=42)

train_idx, test_idx = next(strat_shuf_split.split(data[feature_cols], data.Activity))

# Create the dataframes
X_train = data.loc[train_idx, feature_cols]
y_train = data.loc[train_idx, 'Activity']

X_test  = data.loc[test_idx, feature_cols]
y_test  = data.loc[test_idx, 'Activity']

In [9]:
def print_score(classifier, X_train, y_train, X_test, y_test, train = True):
    print("-"*100)
    if train == True:
        y_pred = classifier.predict(X_train)
        print("Training result:\n")
        print("Accuracy Score: {0:.4f}\n".format(accuracy_score(y_train, y_pred)))
        print("Classification Report:\n{}\n".format(classification_report(y_train, y_pred)))
        print("Confusion Matrix:\n{}\n".format(confusion_matrix(y_train, y_pred)))
        #res = cross_val_score(classifier, X_train, y_train, cv = 10, n_jobs= -1, scoring ="accuracy")
        #print("Average Accuracy:\t{0:.4f}\n".format(res.mean()))
        #print("Standard Deviation:\t{0:.4f}".format(res.std()))
    elif train == False:
        y_pred = classifier.predict(X_test)
        print("Test result:\n")
        print("Accuracy Score: {0:.4f}\n".format(accuracy_score(y_test, y_pred)))
        print("Classification Report:\n{}\n".format(classification_report(y_test, y_pred)))
        print("Confusion Matrix:\n{}\n".format(confusion_matrix(y_test, y_pred)))

In [7]:
random_forest = RandomForestClassifier(n_estimators = 50, criterion='entropy',random_state = 42, n_jobs=-1)
random_forest.fit(X_train, y_train)
print_score(random_forest ,X_train,y_train,X_test,y_test, train = True)
print_score(random_forest ,X_train,y_train,X_test,y_test, train = False)

----------------------------------------------------------------------------------------------------
Training result:

Accuracy Score: 1.0000

Classification Report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1361
          1       1.00      1.00      1.00      1244
          2       1.00      1.00      1.00      1334
          3       1.00      1.00      1.00      1205
          4       1.00      1.00      1.00       984
          5       1.00      1.00      1.00      1081

avg / total       1.00      1.00      1.00      7209


Confusion Matrix:
[[1361    0    0    0    0    0]
 [   0 1244    0    0    0    0]
 [   0    0 1334    0    0    0]
 [   0    0    0 1205    0    0]
 [   0    0    0    0  984    0]
 [   0    0    0    0    0 1081]]

Average Accuracy:	0.9773

Standard Deviation:	0.0055
----------------------------------------------------------------------------------------------------
Test result:

Accuracy Score: 0.97

In [8]:
svm = SVC(kernel='rbf', random_state=0, gamma=.10, C=1.0)
svm.fit(X_train, y_train)
print_score(svm ,X_train,y_train,X_test,y_test, train = True)
print_score(svm ,X_train,y_train,X_test,y_test, train = False)

----------------------------------------------------------------------------------------------------
Training result:

Accuracy Score: 0.9990

Classification Report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1361
          1       1.00      1.00      1.00      1244
          2       1.00      1.00      1.00      1334
          3       1.00      1.00      1.00      1205
          4       1.00      1.00      1.00       984
          5       1.00      1.00      1.00      1081

avg / total       1.00      1.00      1.00      7209


Confusion Matrix:
[[1361    0    0    0    0    0]
 [   0 1242    2    0    0    0]
 [   0    5 1329    0    0    0]
 [   0    0    0 1205    0    0]
 [   0    0    0    0  984    0]
 [   0    0    0    0    0 1081]]

Average Accuracy:	0.9821

Standard Deviation:	0.0039
----------------------------------------------------------------------------------------------------
Test result:

Accuracy Score: 0.98

In [10]:

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, y_train)

print_score(knn ,X_train,y_train,X_test,y_test, train = True)
print_score(knn ,X_train,y_train,X_test,y_test, train = False)

----------------------------------------------------------------------------------------------------
Training result:

Accuracy Score: 0.9877

Classification Report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1361
          1       0.96      0.97      0.97      1244
          2       0.97      0.97      0.97      1334
          3       1.00      1.00      1.00      1205
          4       1.00      1.00      1.00       984
          5       1.00      1.00      1.00      1081

avg / total       0.99      0.99      0.99      7209


Confusion Matrix:
[[1361    0    0    0    0    0]
 [   2 1206   36    0    0    0]
 [   0   45 1289    0    0    0]
 [   0    0    0 1203    1    1]
 [   0    0    0    3  981    0]
 [   0    0    0    0    1 1080]]

----------------------------------------------------------------------------------------------------
Test result:

Accuracy Score: 0.9673

Classification Report:
             precision    

In [10]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

print_score(logreg ,X_train,y_train,X_test,y_test, train = True)
print_score(logreg ,X_train,y_train,X_test,y_test, train = False)

----------------------------------------------------------------------------------------------------
Training result:

Accuracy Score: 0.9875

Classification Report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1361
          1       0.96      0.97      0.96      1244
          2       0.97      0.97      0.97      1334
          3       1.00      1.00      1.00      1205
          4       1.00      1.00      1.00       984
          5       1.00      1.00      1.00      1081

avg / total       0.99      0.99      0.99      7209


Confusion Matrix:
[[1361    0    0    0    0    0]
 [   0 1201   43    0    0    0]
 [   0   45 1289    0    0    0]
 [   0    0    0 1204    0    1]
 [   0    0    0    0  984    0]
 [   0    0    0    1    0 1080]]

----------------------------------------------------------------------------------------------------
Test result:

Accuracy Score: 0.9841

Classification Report:
             precision    

In [17]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

print_score(dt ,X_train,y_train,X_test,y_test, train = True)
print_score(dt ,X_train,y_train,X_test,y_test, train = False)
dt.tree_.node_count, dt.tree_.max_depth

----------------------------------------------------------------------------------------------------
Training result:

Accuracy Score: 1.0000

Classification Report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1361
          1       1.00      1.00      1.00      1244
          2       1.00      1.00      1.00      1334
          3       1.00      1.00      1.00      1205
          4       1.00      1.00      1.00       984
          5       1.00      1.00      1.00      1081

avg / total       1.00      1.00      1.00      7209


Confusion Matrix:
[[1361    0    0    0    0    0]
 [   0 1244    0    0    0    0]
 [   0    0 1334    0    0    0]
 [   0    0    0 1205    0    0]
 [   0    0    0    0  984    0]
 [   0    0    0    0    0 1081]]

Average Accuracy:	0.9284

Standard Deviation:	0.0093
----------------------------------------------------------------------------------------------------
Test result:

Accuracy Score: 0.92

(405, 21)

In [11]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
bnb = BernoulliNB()
bnb.fit(X_train, y_train)
mnb = MultinomialNB()
mnb.fit(X_train, y_train)

print_score(gnb ,X_train,y_train,X_test,y_test, train = True)
print_score(gnb ,X_train,y_train,X_test,y_test, train = False)
print_score(bnb ,X_train,y_train,X_test,y_test, train = True)
print_score(bnb ,X_train,y_train,X_test,y_test, train = False)
print_score(mnb ,X_train,y_train,X_test,y_test, train = True)
print_score(mnb ,X_train,y_train,X_test,y_test, train = False)

ValueError: Input X must be non-negative