In [1]:
import pandas as pd
import numpy as np

In [2]:
DATASET_PATH = "../data/raw/"

In [3]:
LABELS = [
    "Walking",
    "Upstairs",
    "Downstairs",
    "Sitting",
    "Standing",
    "Laying"
]

## Load Dataset

In [4]:
def load_data(X_path, y_path):
    with open(X_path, 'r') as file:
        X_ = np.array([np.array(serie, dtype=np.float32) for serie in [row.replace('  ', ' ').strip().split(' ') for row in file]])
    with open(y_path, 'r') as file:
        y_ = np.array([elem for elem in [row.replace('  ', ' ').strip().split(' ') for row in file]],dtype=np.int32).T[0]
    return X_, y_ - 1

In [5]:
X_train, y_train = load_data(DATASET_PATH+"train/X_train.txt", DATASET_PATH+"train/y_train.txt")
X_test, y_test = load_data(DATASET_PATH+"test/X_test.txt", DATASET_PATH+"test/y_test.txt")

## Evaluate Models

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [7]:
models = {
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "Support Vector Machines": SVC(),
    "Random Forests": RandomForestClassifier(),
}

In [8]:
n_models = len(models)
n_test_samples, _ = X_test.shape

In [9]:
y_pred = np.zeros((n_test_samples, n_models), dtype=np.int32)
for idx, (name, model) in enumerate(models.items()):
    model.fit(X_train, y_train)
    y_pred[:, idx] = model.predict(X_test)

In [10]:
with open('machine-learning.npy', 'wb') as f:
    np.save(f, y_pred)