In [254]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import scipy.stats as sp

In [120]:
path = "./data/data.csv"
df = pd.read_csv(path)

In [193]:
N_SUBJECTS = 10
N_CONDITIONS = 3
N_REPS = 10
N_SENSORS = 6
N_TRIALS = N_SUBJECTS * N_CONDITIONS * N_REPS

# Preprocessing

In [122]:
trials = ["subject", "condition", "replication"]
index_columns = trials + ["time"]
df = df.set_index(index_columns)

matrix = pd.DataFrame()
joint_map = {1: "ankle", 2: "knee", 3: "hip"}
leg_map = {1: "left", 2: "right"}

for leg_key, leg_val in leg_map.items():
    for joint_key, joint_val in joint_map.items():
        matrix[f"{leg_val}_{joint_val}"] = df[(df.leg == leg_key) & (df.joint == joint_key)].angle

rs = matrix.reset_index()
series = pd.DataFrame((rs.subject - 1) * N_REPS * N_CONDITIONS + (rs.condition - 1) * N_REPS + rs.replication)
matrix["trial"] = series.set_index(matrix.index)




In [123]:
matrix.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,left_ankle,left_knee,left_hip,right_ankle,right_knee,right_hip,trial
subject,condition,replication,time,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1,1,0,4.682881,3.693683,16.607868,5.015829,2.244425,18.94829,1
1,1,1,1,5.073127,4.258876,16.851029,5.291201,3.381678,19.2653,1


In [124]:
target = pd.Series(range(N_TRIALS), index=range(1, N_TRIALS + 1))
target = 1 + ((target // 10) % 3)
target.name = "condition"

matrix = matrix.reset_index().drop(trials, axis=1).set_index(["trial", "time"])

In [196]:
target2 = pd.Series(range(N_TRIALS * N_SENSORS), index=range(1, N_TRIALS * N_SENSORS + 1))
target2 = 1 + ((target2 // 60) % 3)
target2.name = "condition"

matrix2 = matrix.reset_index().melt(id_vars=["trial", "time"])

In [298]:
means = matrix.groupby("trial").mean()
means.columns=[f"{col}_mean" for col in matrix.columns]
stdevs = matrix.groupby("trial").std()
stdevs.columns=[f"{col}_stdev" for col in matrix.columns]
other_features = pd.DataFrame()

means = means.T.iloc[0:6].T

In [297]:
grouped = matrix2.groupby(["trial", "variable"])
means2 = grouped.mean().drop("time", axis=1)
means2.name = "mean"
stdevs2 = grouped.std().drop("time", axis=1)
low_q = grouped.quantile(0.25).drop("time", axis=1)
high_q = grouped.quantile(0.75).drop("time", axis=1)

In [313]:
features = pd.concat([means], axis=1)

features2 = pd.concat([means2, stdevs2, low_q, high_q], axis=1)
features2.columns = ["mean", "std", "low", "high"]

# Testing Model

In [301]:
from sklearn import linear_model, ensemble
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, balanced_accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [302]:
TRAIN_SUBJECTS = 7
TRAIN_TRIALS = N_TRIALS * TRAIN_SUBJECTS / N_SUBJECTS

features_used = features2
target_used = target2

X = features_used.copy()
y = target_used.copy()

X_split = TRAIN_TRIALS
y_split = (X_split * N_SENSORS)

X_train = X.loc[:X_split].values
y_train = y.loc[:y_split].values
X_test = X.loc[X_split+1:].values
y_test = y.loc[y_split +1:].values

In [303]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,low,high
trial,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,left_ankle,7.850984,5.137240,5.131447,11.907845
1,left_hip,4.597241,12.563371,-6.709405,17.070042
1,left_knee,21.860145,18.214232,8.704838,29.831388
1,right_ankle,7.061943,4.313286,4.264137,10.275894
1,right_hip,5.737394,12.731324,-6.991930,18.948290
...,...,...,...,...,...
300,left_hip,14.035819,13.332365,1.676120,26.407979
300,left_knee,18.891068,18.893064,4.039879,29.254627
300,right_ankle,-4.074704,1.481373,-4.937225,-3.285091
300,right_hip,12.226518,15.256644,-1.682344,26.837917


In [304]:
# here we will use cross validation on X_train

In [305]:
X_test.shape

(540, 4)

In [306]:
y_test.shape

(540,)

In [311]:
# we can change this, need to have reasoning for the model
model = linear_model.LogisticRegression() 
# model = ensemble.AdaBoostClassifier()
model.fit(X_train, y_train)

In [312]:
y_pred = model.predict(X_test)

for f in [confusion_matrix, accuracy_score]:
    print(str(f).split()[1])
    print(f(y_test, y_pred))

# good scores - so look at the errors!

confusion_matrix
[[ 54  45  81]
 [ 17 124  39]
 [ 34  50  96]]
accuracy_score
0.5074074074074074


In [None]:
# ten subjects
# ten replications
# three conditions

# 6 measurements for each one (two legs, three joints)
# time series are 101 points long
# so the data is actually (300 x 101 x 6), so we should represent it as such.

# train-test split: 
# 7 subjects for train: should do leave-one-out validation (as in 1 subject each time)
# 3 subjects for test: leave three subjects for test