In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from sklearn.metrics import accuracy_score, confusion_matrix
from src.features.LearningAlgorithms import ClassificationAlgorithms
from sklearn.metrics import accuracy_score
# Plot settings
plt.style.use("fivethirtyeight")
plt.rcParams["figure.figsize"] = (20, 5)
plt.rcParams["figure.dpi"] = 100
plt.rcParams["lines.linewidth"] = 2


df = pd.read_pickle("../data-science-template-main/data/interim/03_data_features.pkl")
# --------------------------------------------------------------
# Create a training and test set
# --------------------------------------------------------------
df_train = df.drop(['Set','participant','category'],axis=1)

x = df_train.drop(columns="label",axis = 1)
y = df_train['label']

X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42,stratify=y)

# --------------------------------------------------------------
# Split feature subsets
# --------------------------------------------------------------
basic_features = ["acc_x","acc_y","acc_z","gyr_x","gyr_y","gyr_z"]
squared_features = ["acc_r","gyr_r"]
pca_features = ["pca_1","pca_2","pca_3"]
time_features = [f for f in df_train.columns if"_temp_" in f]
freq_features = [f for f in df_train.columns if ("_freq" in f) or ("_pse" in f)]
cluster_features = ["cluster"]
feature_set_1 = list(set(basic_features))
feature_set_2 = list(set(basic_features+squared_features+pca_features))
feature_set_3 = list(set(feature_set_2+time_features))
feature_set_4 = list(set(feature_set_3+freq_features))

selected_features = [
    'pca_1', 
    'acc_z_freq_0.0_Hz_ws_14', 
    'acc_x_freq_0.0_Hz_ws_14', 
    'gyr_z_temp_std_ws_5', 
    'acc_x_pse', 
    'gyr_r_freq_0.0_Hz_ws_14', 
    'gyr_z_freq_0.714_Hz_ws_14', 
    'acc_z_temp_mean_ws_5', 
    'acc_y_temp_mean_ws_5', 
    'gyr_x_freq_0.0_Hz_ws_14',
]

# --------------------------------------------------------------
# Perform forward feature selection using simple decision tree
# --------------------------------------------------------------
learner  = ClassificationAlgorithms()
max_features=10
selected_features, ordered_features, ordered_scores = learner.forward_selection(max_features,X_train,y_train)
# --------------------------------------------------------------
# Grid search for best hyperparameters and model selection
# --------------------------------------------------------------
possible_feature_sets = [
    feature_set_1,
    feature_set_2,
    feature_set_3,
    feature_set_4,
    selected_features,]

feature_names = [
    "feature_set_1",
    "feature_set_2",
    "feature_set_3",
    "feature_set_4",
    "Selected Features",
]


iterations = 1

score_df = pd.DataFrame()

for i, f in zip(range(len(possible_feature_sets)), feature_names):
    print("Feature set:", i)
    selected_train_X = X_train[possible_feature_sets[i]]
    selected_test_X = X_test[possible_feature_sets[i]]

    # First run non deterministic classifiers to average their score.
    performance_test_nn = 0
    performance_test_rf = 0

    for it in range(0, iterations):
        print("\tTraining neural network,", it)
        (
            class_train_y,
            class_test_y,
            class_train_prob_y,
            class_test_prob_y,
        ) = learner.feedforward_neural_network(
            selected_train_X,
            y_train,
            selected_test_X,
            gridsearch=False,
        )
        performance_test_nn += accuracy_score(y_test, class_test_y)

        print("\tTraining random forest,", it)
        (
            class_train_y,
            class_test_y,
            class_train_prob_y,
            class_test_prob_y,
        ) = learner.random_forest(
            selected_train_X, y_train, selected_test_X, gridsearch=True
        )
        performance_test_rf += accuracy_score(y_test, class_test_y)

    performance_test_nn = performance_test_nn / iterations
    performance_test_rf = performance_test_rf / iterations

    # And we run our deterministic classifiers:
    print("\tTraining KNN")
    (
        class_train_y,
        class_test_y,
        class_train_prob_y,
        class_test_prob_y,
    ) = learner.k_nearest_neighbor(
        selected_train_X, y_train, selected_test_X, gridsearch=True
    )
    performance_test_knn = accuracy_score(y_test, class_test_y)

    print("\tTraining decision tree")
    (
        class_train_y,
        class_test_y,
        class_train_prob_y,
        class_test_prob_y,
    ) = learner.decision_tree(
        selected_train_X, y_train, selected_test_X, gridsearch=True
    )
    performance_test_dt = accuracy_score(y_test, class_test_y)

    print("\tTraining naive bayes")
    (
        class_train_y,
        class_test_y,
        class_train_prob_y,
        class_test_prob_y,
    ) = learner.naive_bayes(selected_train_X, y_train, selected_test_X)

    performance_test_nb = accuracy_score(y_test, class_test_y)

    # Save results to dataframe
    models = ["NN", "RF", "KNN", "DT", "NB"]
    new_scores = pd.DataFrame(
        {
            "model": models,
            "feature_set": f,
            "accuracy": [
                performance_test_nn,
                performance_test_rf,
                performance_test_knn,
                performance_test_dt,
                performance_test_nb,
            ],
        }
    )
    score_df = pd.concat([score_df, new_scores])

print("Final Score DataFrame:")
print(score_df)

score_df.sort_values(by="accuracy" , ascending=False)
plt.rcParams['figure.figsize'] = [10,10]
sns.barplot(x = "model",y="accuracy",hue="feature_set", data = score_df)
plt.ylim(0.7,1)
plt.show()

(
    class_train_y,
    class_test_y,
    class_train_prob_y,
    class_test_prob_y,
    )=learner.random_forest(
      X_train[feature_set_4], y_train, X_test[feature_set_4], gridsearch=True
)

#accuracy_score(ytest,class_test_y)

classes = class_train_prob_y.columns
classes

classes = class_train_prob_y.columns
cm = confusion_matrix(y_test,class_test_y,labels=classes)
# create confusion matrix for cm
plt.figure(figsize=(10, 10))
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.title("Confusion matrix")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(
        j,
        i,
        format(cm[i, j]),
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black",
    )
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.grid(False)
plt.show()

participant_df = df.drop(["Set","category"],axis=1)
X_train = participant_df[participant_df['participant']!="A"].drop("label",axis=1)
y_train = participant_df[participant_df['participant']!="A"]["label"]
X_test = participant_df[participant_df['participant']=="A"].drop("label",axis=1)
y_test = participant_df[participant_df['participant']=="A"]["label"]

classes = class_train_prob_y.columns
cm = confusion_matrix(y_test,class_test_y,labels=classes)
# create confusion matrix for cm
plt.figure(figsize=(10, 10))
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.title("Confusion matrix")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(
        j,
        i,
        format(cm[i, j]),
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black",
    )
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.grid(False)
plt.show()

score_df.sort_values(by="accuracy" , ascending=False)
plt.rcParams['figure.figsize'] = [10,10]
sns.barplot(x = "model",y="accuracy",hue="feature_set", data = score_df)
plt.ylim(0.7,1)
plt.show()

(
    class_train_y,
    class_test_y,
    class_train_prob_y,
    class_test_prob_y,
    )=learner.feedforward_neural_network(
      X_train[selected_features], y_train, X_test[selected_features], gridsearch=False
)

print(accuracy_score(y_test,class_test_y))

classes = class_train_prob_y.columns
classes


participant_df = df.drop(["Set","category"],axis=1)
X_train = participant_df[participant_df['participant']!="A"].drop("label",axis=1)
y_train = participant_df[participant_df['participant']!="A"]["label"]
X_test = participant_df[participant_df['participant']=="A"].drop("label",axis=1)
y_test = participant_df[participant_df['participant']=="A"]["label"]

classes = class_train_prob_y.columns
cm = confusion_matrix(y_test,class_test_y,labels=classes)
# create confusion matrix for cm
plt.figure(figsize=(10, 10))
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.title("Confusion matrix")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(
        j,
        i,
        format(cm[i, j]),
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black",
    )
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.grid(False)
plt.show()



## counting reps

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from src.features.DataTransformation import LowPassFilter
from scipy.signal import argrelextrema
from sklearn.metrics import mean_absolute_error

pd.options.mode.chained_assignment = None


# Plot settings
plt.style.use("fivethirtyeight")
plt.rcParams["figure.figsize"] = (20, 5)
plt.rcParams["figure.dpi"] = 100
plt.rcParams["lines.linewidth"] = 2


# --------------------------------------------------------------
# Load data
# --------------------------------------------------------------
df = pd.read_pickle("data/processed/data_processed.pkl")
df = df[df['label']!="rest"]

acc_r = df['acceleration_x']**2 + df['acceleration_y']**2 + df['acceleration_z']**2
gyr_r = df['gyroscope_x']**2 + df['gyroscope_y']**2 + df['gyroscope_z']**2

df['acc_r'] = np.sqrt(acc_r)
df['gyr_r'] = np.sqrt(gyr_r)
df
# --------------------------------------------------------------
# Split data
# --------------------------------------------------------------
bench_df = df[df['label']=="bench"]
row_df = df[df['label']=="row"]
squat_df = df[df['label']=="squat"]
dead_df = df[df['label']=="dead"]
ohp_df = df[df['label']=="ohp"]
row_df
# --------------------------------------------------------------
# Visualize data to identify patterns
# --------------------------------------------------------------

lowpass = LowPassFilter()
bench_set = bench_df[bench_df['Set'] == bench_df['Set'].unique()[0]]
row_set = row_df[row_df['Set'] == row_df['Set'].unique()[0]]
squat_set = squat_df[squat_df['Set'] == squat_df['Set'].unique()[0]]
dead_set = dead_df[dead_df['Set'] == dead_df['Set'].unique()[0]]
ohp_set = ohp_df[ohp_df['Set'] == ohp_df['Set'].unique()[0]]
column = "acc_r"
lowpass.low_pass_filter(bench_set,col = column , sampling_frequency=5,cutoff_frequency=0.4,order=5)[column+"_lowpass"].plot()
# --------------------------------------------------------------
# Configure LowPassFilter
# --------------------------------------------------------------

def countrep(dataset, cutoff_freq=0.4, order=10, column="acc_r"):
    # Apply the low-pass filter
    data = lowpass.low_pass_filter(dataset, col=column, sampling_frequency=5, cutoff_frequency=cutoff_freq, order=order)
    
    # Find local maxima (peaks)
    indexes = argrelextrema(data[column + "_lowpass"].values, comparator=np.greater)
    peaks = data.iloc[indexes]
    
    # Plot the filtered signal and highlight the peaks
    fig, ax = plt.subplots()
    plt.plot(data[f"{column}_lowpass"], label=f"{column}_lowpass")
    plt.plot(peaks[f"{column}_lowpass"], "o", color="red", label="Peaks")
    
    # Add labels and title
    ax.set_ylabel(f"{column}_lowpass")
    exercise = dataset["label"].iloc[0].title()
    category = dataset["category"].iloc[0].title()
    plt.title(f"{category}, {exercise}: {len(peaks)} Reps")
    plt.legend()
    plt.show()
    return len(peaks)

countrep(dead_set)

# --------------------------------------------------------------
# Create benchmark dataframe
# --------------------------------------------------------------
df['reps'] = df['category'].apply(lambda x:5 if x=="heavy" else 10)
reps_df = df.groupby(["Set",'category',"label"])['reps'].max().reset_index()
reps_df['rep_pred'] = 0  

for s in df['Set'].unique():
    subset = df[df['Set']==s]
    column = "acc_r"
    cutoff = 0.4

    if subset['label'].iloc[0]=="Squat":
        cutoff = 0.35
    elif subset['label'].iloc[0]=="row":
        cutoff = 0.65
        col = "gyr_x"
    if subset['label'].iloc[0]=="ohp":
        cutoff = 0.35

    reps = countrep(subset,column=column , cutoff_freq=cutoff)

    reps_df.loc[reps_df['Set']==s , "rep_pred"] = reps

reps_df
# --------------------------------------------------------------
# Evaluate the results
# --------------------------------------------------------------

error = mean_absolute_error(reps_df['reps'] , reps_df['rep_pred']).round(2)
reps_df.groupby(["label", "category"])[["reps", "rep_pred"]].mean().plot.bar()
