## Human Activity Recognition - Group 7

#### Importing necessary libraries

In [10]:
from pandas import read_csv
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier

#### Function to load a single file as a numpy array

In [11]:
def load_file(filepath):
    df = read_csv(filepath, header=None, delim_whitespace=True)
    return df.values

#### Function to load a dataset group

In [12]:
def load_dataset_group(group, prefix=''):
    X = load_file(prefix + group + '/X_' + group + '.txt')
    y = load_file(prefix + group + '/y_' + group + '.txt')
    return X, y

#### Function to load the dataset and return train and test elements

In [13]:
def load_dataset(prefix=''):
    print('Dimensions of the dataset:')

    train_X, train_y = load_dataset_group('train', f'{prefix}HARDataset/')
    print(train_X.shape, train_y.shape)
    test_X, test_y = load_dataset_group('test', f'{prefix}HARDataset/')
    print(test_X.shape, test_y.shape)

    train_y, test_y = train_y[:, 0], test_y[:, 0]
    print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
    
    return train_X, train_y, test_X, test_y

#### Function to create a dictionary of models to evaluate

In [14]:
def define_models(models=dict()):
    models['KNN'] = KNeighborsClassifier(n_neighbors=7)
    models['CART'] = DecisionTreeClassifier()
    models['SVM'] = SVC()
    models['Naive Bayes'] = GaussianNB()
    # models['Bagging'] = BaggingClassifier(n_estimators=100)
    # models['Random Forest'] = RandomForestClassifier(n_estimators=100)
    # models['Extra Trees'] = ExtraTreesClassifier(n_estimators=100)
    # models['Gradient Boosting'] = GradientBoostingClassifier(n_estimators=100)
    print()
    print('Training %d models...' % len(models))
    return models

#### Function to train a single model

In [15]:
def train_model(train_X, train_y, test_X, test_y, model):
    model.fit(train_X, train_y)
    y_pred = model.predict(test_X)
    return accuracy_score(test_y, y_pred) * 100.0


#### Function to evaluate a dictionary of models

In [16]:
def evaluate_models(train_X, train_y, test_X, test_y, models):
    results = {}
    for name, model in models.items():
        results[name] = train_model(train_X, train_y, test_X, test_y, model)
        print('%s: %.3f' % (name, results[name]))
    return results


#### Function to print the results of each model

In [17]:
def summarize_results(results):
    mean_scores = list(results.items())
    mean_scores = sorted(mean_scores, key=lambda x: x[1])
    mean_scores = list(reversed(mean_scores))
    print()
    print('In order of decreasing accuracy:')
    for name, score in mean_scores:
        print('%s - %.3f' % (name, score))

#### Final execution

In [18]:
train_X, train_y, test_X, test_y = load_dataset()

models = define_models()

results = evaluate_models(train_X, train_y, test_X, test_y, models)

summarize_results(results)

Dimensions of the dataset:
(7352, 561) (7352, 1)
(2947, 561) (2947, 1)
(7352, 561) (7352,) (2947, 561) (2947,)

Training 4 models...
KNN: 90.329
CART: 85.952
SVM: 95.046
Naive Bayes: 77.027

In order of decreasing accuracy:
SVM - 95.046
KNN - 90.329
CART - 85.952
Naive Bayes - 77.027
