In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold

In [55]:
def print_score(clf, X, y, cv=0):
    
    y_pred = clf.predict(X)
    acc_score = accuracy_score(y, y_pred)
    clf_report = classification_report(y, y_pred)
    conf_matrix = confusion_matrix(y, y_pred)
                 
    print(f"Results:\n")
    print(f"accuracy score: {acc_score:.4f}\n")
    print(f"Classification Report: \n {clf_report}\n")
    print(f"Confusion Matrix: \n {conf_matrix}\n")

    if cv > 1:
        res = cross_val_score(clf, X, y, cv=cv, scoring='accuracy')
        print(f"Average Accuracy: \t {np.mean(res):.4f}")
        print(f"Accuracy SD: \t\t {np.std(res):.4f}")
        
 

In [96]:
df = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [3]:
df.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


In [97]:
X = df.drop(columns=['Activity'])
y = df['Activity']
X_test = df_test.drop(columns=['Activity'])
y_test = df_test['Activity']

In [32]:
X.describe()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
count,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,...,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0
mean,0.274488,-0.017695,-0.109141,-0.605438,-0.510938,-0.604754,-0.630512,-0.526907,-0.60615,-0.468604,...,-0.307009,-0.625294,0.008684,0.002186,0.008726,-0.005981,-0.489547,0.058593,-0.056515,17.413085
std,0.070261,0.040811,0.056635,0.448734,0.502645,0.418687,0.424073,0.485942,0.414122,0.544547,...,0.321011,0.307584,0.336787,0.448306,0.608303,0.477975,0.511807,0.29748,0.279122,8.975143
min,-1.0,-1.0,-1.0,-1.0,-0.999873,-1.0,-1.0,-1.0,-1.0,-1.0,...,-0.995357,-0.999765,-0.97658,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0
25%,0.262975,-0.024863,-0.120993,-0.992754,-0.978129,-0.980233,-0.993591,-0.978162,-0.980251,-0.936219,...,-0.542602,-0.845573,-0.121527,-0.289549,-0.482273,-0.376341,-0.812065,-0.017885,-0.143414,8.0
50%,0.277193,-0.017219,-0.108676,-0.946196,-0.851897,-0.859365,-0.950709,-0.857328,-0.857143,-0.881637,...,-0.343685,-0.711692,0.009509,0.008943,0.008735,-0.000368,-0.709417,0.182071,0.003181,19.0
75%,0.288461,-0.010783,-0.097794,-0.242813,-0.034231,-0.262415,-0.29268,-0.066701,-0.265671,-0.017129,...,-0.126979,-0.503878,0.150865,0.292861,0.506187,0.359368,-0.509079,0.248353,0.107659,26.0
max,1.0,1.0,1.0,1.0,0.916238,1.0,1.0,0.967664,1.0,1.0,...,0.989538,0.956845,1.0,1.0,0.998702,0.996078,1.0,0.478157,1.0,30.0


In [68]:
X_test.describe()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
count,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,...,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0,2947.0
mean,0.273996,-0.017863,-0.108386,-0.613635,-0.50833,-0.633797,-0.641278,-0.522676,-0.637038,-0.462063,...,-0.277593,-0.598756,0.005264,0.003799,0.040029,-0.017298,-0.513923,0.074886,-0.04872,12.986427
std,0.06057,0.025745,0.042747,0.412597,0.494269,0.362699,0.385199,0.479899,0.357753,0.523916,...,0.317245,0.311042,0.336147,0.445077,0.634989,0.501311,0.509205,0.3243,0.241467,6.950984
min,-0.592004,-0.362884,-0.576184,-0.999606,-1.0,-0.998955,-0.999417,-0.999914,-0.998899,-0.952357,...,-1.0,-1.0,-1.0,-0.993402,-0.998898,-0.991096,-0.984195,-0.913704,-0.949228,2.0
25%,0.262075,-0.024961,-0.121162,-0.990914,-0.973664,-0.976122,-0.992333,-0.974131,-0.975352,-0.934447,...,-0.517494,-0.829593,-0.130541,-0.2826,-0.518924,-0.428375,-0.829722,0.02214,-0.098485,9.0
50%,0.277113,-0.016967,-0.108458,-0.931214,-0.790972,-0.827534,-0.937664,-0.799907,-0.817005,-0.852659,...,-0.311023,-0.683672,0.005188,0.006767,0.047113,-0.026726,-0.729648,0.181563,-0.010671,12.0
75%,0.288097,-0.010143,-0.097123,-0.267395,-0.105919,-0.311432,-0.321719,-0.133488,-0.322771,-0.009965,...,-0.083559,-0.458332,0.1462,0.288113,0.622151,0.394387,-0.545939,0.260252,0.092373,18.0
max,0.671887,0.246106,0.494114,0.465299,1.0,0.489703,0.439657,1.0,0.427958,0.786436,...,1.0,1.0,0.998898,0.986347,1.0,1.0,0.83318,1.0,0.973113,24.0


In [33]:
y.value_counts()

LAYING                1407
STANDING              1374
SITTING               1286
WALKING               1226
WALKING_UPSTAIRS      1073
WALKING_DOWNSTAIRS     986
Name: Activity, dtype: int64

In [34]:
y_test.value_counts()

LAYING                537
STANDING              532
WALKING               496
SITTING               491
WALKING_UPSTAIRS      471
WALKING_DOWNSTAIRS    420
Name: Activity, dtype: int64

In [35]:
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X = sc.fit_transform(X)
# X_test = sc.fit_transform(X_test)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


### Logistic Regressions

In [98]:
LRmodel = LogisticRegression(solver='liblinear') 
LRmodel.fit(X, y)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False)

In [99]:
predictions = LRmodel.predict(X_test)

In [71]:
from sklearn.metrics import classification_report

In [56]:
print_score(clf, X, y, cv=10)

Results:

accuracy score: 0.9195

Classification Report: 
                     precision    recall  f1-score   support

            LAYING       0.99      0.99      0.99      1407
           SITTING       0.96      0.70      0.81      1286
          STANDING       0.78      0.97      0.87      1374
           WALKING       0.97      0.99      0.98      1226
WALKING_DOWNSTAIRS       1.00      0.86      0.92       986
  WALKING_UPSTAIRS       0.89      0.99      0.94      1073

         micro avg       0.92      0.92      0.92      7352
         macro avg       0.93      0.92      0.92      7352
      weighted avg       0.93      0.92      0.92      7352


Confusion Matrix: 
 [[1393    0   10    2    0    2]
 [  16  900  369    0    0    1]
 [   0   34 1338    2    0    0]
 [   0    0    0 1217    1    8]
 [   0    0    0   24  845  117]
 [   0    0    0    4    2 1067]]



NameError: name 'cross_val_score' is not defined

### Support Vector Machine

In [72]:
from sklearn import svm

In [73]:
C = 0.1
SVMmodel = svm.SVC(kernel='poly', degree=3, C=C)
SVMmodel.fit(X, y)



SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [74]:
predictions = SVMmodel.predict(X_test)

In [None]:
print_score(clf, X, y, cv=10)

### Random Forest

In [75]:
from sklearn.ensemble import RandomForestClassifier

In [76]:
RFModel = RandomForestClassifier(random_state=10)
RFModel.fit(X, y)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=10, verbose=0, warm_start=False)

In [50]:
predictions = RFModel.predict(X_test)

In [51]:
print(classification_report(y_test, predictions))

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       537
           SITTING       0.66      0.96      0.78       491
          STANDING       0.93      0.55      0.69       532
           WALKING       0.88      0.94      0.91       496
WALKING_DOWNSTAIRS       0.90      0.87      0.89       420
  WALKING_UPSTAIRS       0.91      0.87      0.89       471

         micro avg       0.86      0.86      0.86      2947
         macro avg       0.88      0.87      0.86      2947
      weighted avg       0.88      0.86      0.86      2947



#### Stacking

In [77]:
def Stacking(model,train,y,test,n_fold):
    folds=StratifiedKFold(n_splits=n_fold,random_state=1)
    test_pred=np.empty((test.shape[0],1),float)
    train_pred=np.empty((0,1),float)
    for train_indices,val_indices in folds.split(train,y.values):
        x_train,x_val=train.iloc[train_indices],train.iloc[val_indices]
        y_train,y_val=y.iloc[train_indices],y.iloc[val_indices]

        model.fit(X=x_train,y=y_train)
        train_pred=np.append(train_pred,model.predict(x_val))
        test_pred=np.append(test_pred,model.predict(test))
    return test_pred.reshape(-1,1),train_pred

In [79]:
test_pred1 ,train_pred1=Stacking(model=RFModel,n_fold=10, train=X, test=X_test,y=y)

In [80]:
test_pred2 ,train_pred2=Stacking(model=SVMmodel,n_fold=10, train=X, test=X_test,y=y)



In [82]:
train_pred1=pd.DataFrame(train_pred1)
test_pred1=pd.DataFrame(test_pred1)

train_pred2=pd.DataFrame(train_pred2)
test_pred2=pd.DataFrame(test_pred2)

In [88]:
train_pred1.head()

Unnamed: 0,0
0,STANDING
1,STANDING
2,STANDING
3,STANDING
4,STANDING


In [90]:
df = pd.concat([train_pred1, train_pred2], axis=1)
df_test = pd.concat([test_pred1, test_pred2], axis=1)

In [91]:
df.head()

Unnamed: 0,0,0.1
0,STANDING,STANDING
1,STANDING,STANDING
2,STANDING,STANDING
3,STANDING,STANDING
4,STANDING,STANDING


In [None]:
model = LogisticRegression(random_state=10)
model.fit(df,y)
# model.score(df_test, y_test)

In [None]:
# #LR
# X.sizeLRmodel = LogisticRegression(solver='liblinear') 
# LRmodel.fit(X, y)

In [107]:
print(X.shape)

(7352, 562)


In [172]:
for i in y.unique():
    print(i)

STANDING
SITTING
LAYING
WALKING
WALKING_DOWNSTAIRS
WALKING_UPSTAIRS


In [169]:
y_new = y.replace(to_replace = ['STANDING', 'SITTING','LAYING','WALKING','WALKING_DOWNSTAIRS','WALKING_UPSTAIRS'], value = [1,2,3,4,5,6]) 

In [193]:
X1 = X[0:3500]
X2 = X[3500:]

y1 = y[0:3500]
y2 = y[3500:]

In [194]:
print(X1.shape)
print(X2.shape)

(3500, 562)
(3852, 562)


In [195]:
#SVM
C = 0.1
SVMmodel = svm.SVC(kernel='poly', degree=3, C=C)
SVMmodel.fit(X1, y1)


#RF
RFmodel = RandomForestClassifier(random_state=10)
RFmodel.fit(X1, y1)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=10, verbose=0, warm_start=False)

In [196]:
y2_RF = RFmodel.predict(X2)
y2_SVM = SVMmodel.predict(X2)

In [197]:
print(type(y2_SVM))
print(y2_SVM)
print(y2_SVM.shape)
print(len(y2_SVM))

<class 'numpy.ndarray'>
['LAYING' 'LAYING' 'LAYING' ... 'WALKING_UPSTAIRS' 'WALKING_UPSTAIRS'
 'WALKING_UPSTAIRS']
(3852,)
3852


In [198]:
y2_RF2 = np.reshape(y2_RF,  (len(y2_RF),1)  )

y2_SVM2 = np.reshape(y2_SVM,(len(y2_SVM),1))

In [210]:
from sklearn.preprocessing import OneHotEncoder

one_hot_encoder = OneHotEncoder()

y2_RF3 = one_hot_encoder.fit_transform(y2_RF2).toarray()
y2_SVM3 = one_hot_encoder.fit_transform(y2_SVM2).toarray()

In [222]:
X2_new = np.concatenate((y2_RF3,y2_SVM3),axis=1)

In [224]:
xx = pd.DataFrame(X2_new)
xx.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [226]:
#LR
LRmodel = LogisticRegression(solver='liblinear') 
LRmodel.fit(xx, y2)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False)

In [228]:
predictions = LRmodel.predict(y2)

ValueError: could not convert string to float: 'LAYING'