In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import entropy, skew, kurtosis


def make_data_set(data, classification):
    dataset = pd.DataFrame()
    for axis_label in  data:
        axis_features = pd.DataFrame(index=[0]) 
        axis_features[f'mean_acc_{axis_label}'] = np.mean(data[axis_label])
        axis_features[f'std_dev_acc_{axis_label}'] = np.std(data[axis_label])
        axis_features[f'median_acc_{axis_label}'] = np.median(data[axis_label])
        axis_features[f'range_acc_{axis_label}'] = np.max(data[axis_label]) - np.min(data[axis_label])
        axis_features[f'rms_acc_{axis_label}'] = np.sqrt(np.mean(np.square(data[axis_label])))
        axis_features[f'sma_acc_{axis_label}'] = np.sum(np.abs(data[axis_label]))
        axis_features[f'power_acc_{axis_label}'] = np.sum(np.square(data[axis_label])) / len(data[axis_label])
        axis_features[f'energy_acc_{axis_label}'] = np.sum(np.square(data[axis_label])) / len(data[axis_label])
        axis_features[f'skew_acc_{axis_label}'] = skew(data[axis_label])
        axis_features[f'class'] = classification
        dataset = pd.concat([dataset, axis_features], axis=1)
    return dataset


def read_data():
    directorys = ["data/idle/","data/running/","data/stairs/","data/walking/"]
    files_and_folders = []
    for directory in directorys:
        files = [os.path.join(directory, file) for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
        files_and_folders.extend(files)

    
    locations = files_and_folders
    dataset = pd.DataFrame()
    for path in  locations:
        if 'idle' in path:
            classification = 0
        elif 'running' in path:
            classification = 1
        elif 'stairs' in path:
            classification = 2
        elif 'walking' in path:
            classification = 3
        else:
            raise ValueError("something wrong woth path")
        data = pd.read_csv(path)
        data = make_data_set(data,classification)
        dataset = pd.concat([dataset, data])
    return  dataset.reset_index()

data = read_data()

In [2]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data ,shuffle = True, test_size = 0.20)

X = train_data.iloc[:, :-1].to_numpy()
y = train_data.iloc[:, -1].to_numpy()
print()

model = svm.SVC(kernel="rbf",C=100)
ovr = OneVsRestClassifier(model)
ovr.fit(X,y)
yhat = ovr.predict(X)
print(accuracy_score(yhat,y))
print(precision_score(yhat,y,average='macro'))

X_test = test_data.iloc[:, :-1].to_numpy()
y_test = test_data.iloc[:, -1].to_numpy()
yhat_test = ovr.predict(X_test)



0.9872315728380732
0.8845583079597737


In [3]:
from sklearn.model_selection import cross_val_score
cross_val_score(ovr,X,y,cv=5)
cross_val_score(ovr,X_test,y_test,cv=5)

array([0.98069498, 0.98069498, 0.97683398, 0.98062016, 0.98062016])

In [4]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=0,n_estimators=60)
model.fit(X,y)

y_pred = model.predict(X)
accuracy = accuracy_score(y_pred,y)
print(accuracy_score(y_pred,y))
print(precision_score(y_pred,y,average='macro'))


X_test = test_data.iloc[:, :-1].to_numpy()
y_test = test_data.iloc[:, -1].to_numpy()
yhat_test = model.predict(X_test)


1.0
1.0


In [5]:
cross_val_score(model,X,y,cv=5)
cross_val_score(model,X_test,y_test,cv=5)

array([1., 1., 1., 1., 1.])

In [6]:

from sklearn.model_selection import GridSearchCV

train_data, test_data = train_test_split(data ,shuffle = True, test_size = 0.20)

X = train_data.iloc[:, :-1].to_numpy()
y = train_data.iloc[:, -1].to_numpy()


model_params = {
    'svm': {
        'model': svm.SVC(),
        'params': {
            'C': [1, 10, 20],
            'kernel': ['rbf', 'linear']
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [1, 5, 10],
            'max_depth': [None, 5, 10],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    }
}
    


In [7]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(X, y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,1.0,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,1.0,"{'max_depth': None, 'min_samples_leaf': 1, 'mi..."


In [8]:
clf.best_estimator_

In [12]:
from sklearn.metrics import classification_report
X = train_data.iloc[:, :-1].to_numpy()
y = train_data.iloc[:, -1].to_numpy()

model = clf.best_estimator_

y_pred = model.predict(X)

print(classification_report(y,y_pred))

X_test = test_data.iloc[:, :-1].to_numpy()
y_test = test_data.iloc[:, -1].to_numpy()
yhat_test = model.predict(X_test)
print(cross_val_score(model,X,y,cv=5))
print(cross_val_score(model,X_test,y_test,cv=5))
print(classification_report(y_test,yhat_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       836
           1       1.00      1.00      1.00      2717
           2       1.00      1.00      1.00       133
           3       1.00      1.00      1.00      1483

    accuracy                           1.00      5169
   macro avg       1.00      1.00      1.00      5169
weighted avg       1.00      1.00      1.00      5169

[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       203
           1       1.00      1.00      1.00       691
           2       1.00      1.00      1.00        32
           3       1.00      1.00      1.00       367

    accuracy                           1.00      1293
   macro avg       1.00      1.00      1.00      1293
weighted avg       1.00      1.00      1.00      1293



In [13]:
X = train_data.iloc[:, :-1].to_numpy()
y = train_data.iloc[:, -1].to_numpy()


model = svm.SVC(kernel="linear",C=1)
model.fit(X,y)

y_pred = model.predict(X)

print(classification_report(y,y_pred))

X_test = test_data.iloc[:, :-1].to_numpy()
y_test = test_data.iloc[:, -1].to_numpy()
yhat_test = model.predict(X_test)



print(classification_report(y_test,yhat_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       836
           1       1.00      1.00      1.00      2717
           2       1.00      1.00      1.00       133
           3       1.00      1.00      1.00      1483

    accuracy                           1.00      5169
   macro avg       1.00      1.00      1.00      5169
weighted avg       1.00      1.00      1.00      5169

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       203
           1       1.00      1.00      1.00       691
           2       1.00      1.00      1.00        32
           3       1.00      1.00      1.00       367

    accuracy                           1.00      1293
   macro avg       1.00      1.00      1.00      1293
weighted avg       1.00      1.00      1.00      1293



In [14]:
print(cross_val_score(model,X,y,cv=5))
print(cross_val_score(model,X_test,y_test,cv=5))

[1. 1. 1. 1. 1.]
[1.         1.         1.         1.         0.99612403]
