In [None]:
# Import libraries
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense

In [None]:
def combine_pickles_with_class(path, class_name):
    """
    Combines acc and gyro pickle files for a given class into a single DataFrame.

    Parameters:
    - path (str): Directory path containing the pickle files.
    - class_name (str): The class name to filter files (e.g., 'jump').

    Returns:
    - pd.DataFrame: Combined DataFrame with acc, gyro data and class column.
    """
    # Initialize lists to hold acc and gyro files
    acc_files = []
    gyro_files = []

    # List all files in the directory
    for file in os.listdir(path):
        if file.startswith(class_name) and file.endswith("_acc.pkl"):
            acc_files.append(file)
        elif file.startswith(class_name) and file.endswith("_gyro.pkl"):
            gyro_files.append(file)

    # Create a list to hold the combined data for all the files
    combined_data_list = []

    # Rename columns appropriately for acc and gyro DataFrames
    for i in range(len(acc_files)):
        acc_df = pd.read_pickle(path + acc_files[i])
        gyro_df = pd.read_pickle(path + gyro_files[i])

        # Drop the first column (assumed to be an index or unneeded)

        # if num of columns is 4, then drop the first column
        if len(acc_df.columns) == 4:
            acc_df.drop(acc_df.columns[0], axis=1, inplace=True)
            gyro_df.drop(gyro_df.columns[0], axis=1, inplace=True)

        # Rename columns
        acc_df.columns = ["ax", "ay", "az"]
        gyro_df.columns = ["gx", "gy", "gz"]

        # Combine acc and gyro DataFrames horizontally
        combined_df = pd.concat([acc_df, gyro_df], axis=1)

        # Add class column
        combined_df["class"] = class_name

        # Append the combined DataFrame to the list
        combined_data_list.append(combined_df)

    # Concatenate all DataFrames vertically to get the final combined DataFrame
    final_df = pd.concat(combined_data_list, ignore_index=True)

    return final_df


# test the function
# combine_pickles_with_class("clean_data/train/", "jump")

In [None]:
# # train, valid, test
data_type = "train"

# # print the description of the data
jump_data_cleaned = combine_pickles_with_class(f"clean_data/{data_type}/", "jump")
walk_data_cleaned = combine_pickles_with_class(f"clean_data/{data_type}/", "walk")
spain_data_cleaned = combine_pickles_with_class(f"clean_data/{data_type}/", "spain")
run_data_cleaned = combine_pickles_with_class(f"clean_data/{data_type}/", "run")

In [None]:
# Function to handle NaN and infinite values
def clean_data(X_train, X_valid, X_test):
    # Replace NaN and infinite values with mean values
    X_train.fillna(X_train.mean(), inplace=True)
    X_valid.fillna(X_valid.mean(), inplace=True)
    X_test.fillna(X_test.mean(), inplace=True)
    return X_train, X_valid, X_test

In [None]:
# Load data
data_train = pd.read_pickle("clean_data/train/all_data_train.pkl")
data_valid = pd.read_pickle("clean_data/valid/all_data_valid.pkl")
data_test = pd.read_pickle("clean_data/test/all_data_test.pkl")

In [None]:
# Split the data into features (X) and target (y)
X_train = data_train.drop("class", axis=1)
X_valid = data_valid.drop("class", axis=1)
X_test = data_test.drop("class", axis=1)

y_train = data_train["class"]
y_valid = data_valid["class"]
y_test = data_test["class"]

In [None]:
# Encode the target variable
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
encoded_y_valid = encoder.transform(y_valid)
encoded_y_test = encoder.transform(y_test)

In [None]:
# Clean the data (replace NaNs and infinities)
X_train, X_valid, X_test = clean_data(X_train, X_valid, X_test)

In [None]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [None]:
# Check for NaNs after scaling
print("After cleaning:")
print(f"X_train NaNs: {np.isnan(X_train).sum()}")
print(f"X_valid NaNs: {np.isnan(X_valid).sum()}")
print(f"X_test NaNs: {np.isnan(X_test).sum()}")

In [None]:
# Define the first model (10 neurons per layer)
def create_model_10():
    model = Sequential()
    model.add(Dense(10, input_dim=6, activation="relu"))
    model.add(Dense(10, activation="relu"))
    model.add(Dense(4, activation="softmax"))

    model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )

    return model

In [None]:
# Define the second model (50 neurons per layer)
def create_model_50():
    model = Sequential()
    model.add(Dense(50, input_dim=6, activation="relu"))
    model.add(Dense(50, activation="relu"))

    model.add(Dense(4, activation="softmax"))

    model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )

    return model

In [None]:
# Train the model with 10 neurons per layer
model_10 = create_model_10()
history_10 = model_10.fit(
    X_train,
    encoded_y_train,
    epochs=100,
    batch_size=10,
    validation_data=(X_valid, encoded_y_valid),
)

In [None]:
# Train the model with 50 neurons per layer
model_50 = create_model_50()
history_50 = model_50.fit(
    X_train,
    encoded_y_train,
    epochs=100,
    batch_size=10,
    validation_data=(X_valid, encoded_y_valid),
)

In [None]:
# Evaluate both models on the test set
print("\nEvaluating Model with 10 Neurons:")
test_loss_10, test_accuracy_10 = model_10.evaluate(X_test, encoded_y_test)
print(f"Model 10 Neurons - Test Accuracy: {test_accuracy_10 * 100:.2f}%")

print("\nEvaluating Model with 50 Neurons:")
test_loss_50, test_accuracy_50 = model_50.evaluate(X_test, encoded_y_test)
print(f"Model 50 Neurons - Test Accuracy: {test_accuracy_50 * 100:.2f}%")

In [None]:
# Plot accuracy and loss for both models
import matplotlib.pyplot as plt

In [None]:
def plot_history(history, model_name):
    plt.figure(figsize=(14, 5))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history["accuracy"], label="Train Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.title(f"{model_name} - Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history["loss"], label="Train Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.title(f"{model_name} - Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    plt.show()

In [None]:
# Plot training history for both models
plot_history(history_10, "Model with 10 Neurons")
plot_history(history_50, "Model with 50 Neurons")

In [None]:
# Evaluate both models on the test set
print("\nEvaluating Model with 10 Neurons:")
test_loss_10, test_accuracy_10 = model_10.evaluate(X_test, encoded_y_test)
print(f"Model 10 Neurons - Test Accuracy: {test_accuracy_10 * 100:.2f}%")

print("\nEvaluating Model with 50 Neurons:")
test_loss_50, test_accuracy_50 = model_50.evaluate(X_test, encoded_y_test)
print(f"Model 50 Neurons - Test Accuracy: {test_accuracy_50 * 100:.2f}%")

In [None]:
# Plot accuracy and loss for both models
import matplotlib.pyplot as plt


def plot_history(history, model_name):
    plt.figure(figsize=(14, 5))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history["accuracy"], label="Train Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.title(f"{model_name} - Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history["loss"], label="Train Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.title(f"{model_name} - Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    plt.show()


# Plot training history for both models
plot_history(history_10, "Model with 10 Neurons")
plot_history(history_50, "Model with 50 Neurons")