In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import scipy.stats as sp

In [None]:
path = "./data/data.csv"
df = pd.read_csv(path)

In [None]:
N_SUBJECTS = 10
N_CONDITIONS = 3
N_REPS = 10
N_SENSORS = 6
N_TRIALS = N_SUBJECTS * N_CONDITIONS * N_REPS

# Preprocessing

In [None]:
trials = ["subject", "condition", "replication"]
index_columns = trials + ["time"]
df = df.set_index(index_columns)

matrix = pd.DataFrame()
joint_map = {1: "ankle", 2: "knee", 3: "hip"}
leg_map = {1: "left", 2: "right"}

for leg_key, leg_val in leg_map.items():
    for joint_key, joint_val in joint_map.items():
        matrix[f"{leg_val}_{joint_val}"] = df[(df.leg == leg_key) & (df.joint == joint_key)].angle

rs = matrix.reset_index()
series = pd.DataFrame((rs.subject - 1) * N_REPS * N_CONDITIONS + (rs.condition - 1) * N_REPS + rs.replication)
matrix["trial"] = series.set_index(matrix.index)




In [None]:
matrix.head(2)

In [None]:
target = pd.Series(range(N_TRIALS), index=range(1, N_TRIALS + 1))
target = 1 + ((target // 10) % 3)
target.name = "condition"

matrix = matrix.reset_index().drop(trials, axis=1).set_index(["trial", "time"])

In [None]:
target2 = pd.Series(range(N_TRIALS * N_SENSORS), index=range(1, N_TRIALS * N_SENSORS + 1))
target2 = 1 + ((target2 // 60) % 3)
target2.name = "condition"

matrix2 = matrix.reset_index().melt(id_vars=["trial", "time"])

In [None]:
means = matrix.groupby("trial").mean()
means.columns=[f"{col}_mean" for col in matrix.columns]
stdevs = matrix.groupby("trial").std()
stdevs.columns=[f"{col}_stdev" for col in matrix.columns]
other_features = pd.DataFrame()

means = means.T.iloc[0:6].T

In [None]:
grouped = matrix2.groupby(["trial", "variable"])
means2 = grouped.mean().drop("time", axis=1)
means2.name = "mean"
stdevs2 = grouped.std().drop("time", axis=1)
low_q = grouped.quantile(0.25).drop("time", axis=1)
high_q = grouped.quantile(0.75).drop("time", axis=1)

In [None]:
features = pd.concat([means], axis=1)

features2 = pd.concat([means2, stdevs2, low_q, high_q], axis=1)
features2.columns = ["mean", "std", "low", "high"]

# Testing Model

In [None]:
from sklearn import linear_model, ensemble
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, balanced_accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
TRAIN_SUBJECTS = 7
TRAIN_TRIALS = N_TRIALS * TRAIN_SUBJECTS / N_SUBJECTS

features_used = features2
target_used = target2

X = features_used.copy()
y = target_used.copy()

X_split = TRAIN_TRIALS
y_split = (X_split * N_SENSORS)

X_train = X.loc[:X_split].values
y_train = y.loc[:y_split].values
X_test = X.loc[X_split+1:].values
y_test = y.loc[y_split +1:].values

In [None]:
X

In [None]:
# here we will use cross validation on X_train

In [None]:
X_test.shape

In [None]:
y_test.shape

In [None]:
# we can change this, need to have reasoning for the model
model = linear_model.LogisticRegression() 
# model = ensemble.AdaBoostClassifier()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

for f in [confusion_matrix, accuracy_score]:
    print(str(f).split()[1])
    print(f(y_test, y_pred))

# good scores - so look at the errors!

In [None]:
# ten subjects
# ten replications
# three conditions

# 6 measurements for each one (two legs, three joints)
# time series are 101 points long
# so the data is actually (300 x 101 x 6), so we should represent it as such.

# train-test split: 
# 7 subjects for train: should do leave-one-out validation (as in 1 subject each time)
# 3 subjects for test: leave three subjects for test