In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import os
from keras.models import Sequential
from keras.layers import GRU, Dense, Dropout
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Function to clean NaN and infinite values
def clean_data(X_train, X_valid, X_test):
    X_train.fillna(X_train.mean(), inplace=True)
    X_valid.fillna(X_valid.mean(), inplace=True)
    X_test.fillna(X_test.mean(), inplace=True)
    return X_train, X_valid, X_test

# Load data
data_train = pd.read_pickle("clean_data/train/all_data_train.pkl")
data_valid = pd.read_pickle("clean_data/valid/all_data_valid.pkl")
data_test = pd.read_pickle("clean_data/test/all_data_test.pkl")

# Split the data into features (X) and target (y)
X_train = data_train.drop("class", axis=1)
X_valid = data_valid.drop("class", axis=1)
X_test = data_test.drop("class", axis=1)

y_train = data_train["class"]
y_valid = data_valid["class"]
y_test = data_test["class"]

# Encode the target variable (multi-class encoding)
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
encoded_y_valid = encoder.transform(y_valid)
encoded_y_test = encoder.transform(y_test)

# Clean the data (replace NaNs and infinities)
X_train, X_valid, X_test = clean_data(X_train, X_valid, X_test)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

# Function to adjust the window length based on the feature size
def adjust_window_length(X, window_length):
    num_features = X.shape[1]

    # Ensure the window length does not exceed the number of features
    if window_length > num_features:
        print(f"Window length {window_length} is too large for {num_features} features. Adjusting window length.")
        window_length = num_features

    if num_features % window_length != 0:
        new_window_length = num_features // (num_features // window_length)
        print(f"Adjusted window length from {window_length} to {new_window_length}")
        return new_window_length

    return window_length

# Function to build a GRU model with variable layers and neurons
def build_gru_model(num_neurons=32, num_layers=1, dropout_rate=0.2, input_shape=(None,)):
    model = Sequential()
    for i in range(num_layers):
        if i == 0:
            model.add(GRU(num_neurons, return_sequences=(num_layers > 1), input_shape=input_shape))
        else:
            model.add(GRU(num_neurons, return_sequences=(i != num_layers - 1)))
        model.add(Dropout(dropout_rate))
    
    # Output layer for classification (4 classes)
    model.add(Dense(4, activation='softmax'))
    
    # Compile the model
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Wrap the model for use with GridSearchCV
model = KerasClassifier(build_fn=build_gru_model, input_shape=(X_train.shape[1],))

# Define hyperparameter grid
param_grid = {
    'num_neurons': [32, 64, 128],
    'num_layers': [1, 2],
    'dropout_rate': [0.1, 0.2, 0.3],
    'epochs': [30],
    'batch_size': [32, 64]
}

# Set up GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, verbose=1)

# Fit the grid search
grid_result = grid.fit(X_train, encoded_y_train)

# Display the best model and its parameters
print(f"Best Accuracy: {grid_result.best_score_} using {grid_result.best_params_}")

# Evaluate the best model on validation data
best_model = grid_result.best_estimator_.model
_, accuracy = best_model.evaluate(X_valid, encoded_y_valid)
print(f"Validation Accuracy of Best Model: {accuracy}")

# Predict on the test data
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])  # Example reshape for 1 window length
y_pred = np.argmax(best_model.predict(X_test_reshaped), axis=1)

# Generate confusion matrix
cm = confusion_matrix(encoded_y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="g")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

# Function to combine pickles with class
def combine_pickles_with_class(path, class_name):
    acc_files = []
    gyro_files = []

    # List all files in the directory
    for file in os.listdir(path):
        if file.startswith(class_name) and file.endswith("_acc.pkl"):
            acc_files.append(file)
        elif file.startswith(class_name) and file.endswith("_gyro.pkl"):
            gyro_files.append(file)

    combined_data_list = []

    # Process and combine acc and gyro files
    for i in range(len(acc_files)):
        acc_df = pd.read_pickle(os.path.join(path, acc_files[i]))
        gyro_df = pd.read_pickle(os.path.join(path, gyro_files[i]))

        # Drop the first column if there are 4 columns
        if len(acc_df.columns) == 4:
            acc_df.drop(acc_df.columns[0], axis=1, inplace=True)
            gyro_df.drop(gyro_df.columns[0], axis=1, inplace=True)

        acc_df.columns = ["ax", "ay", "az"]
        gyro_df.columns = ["gx", "gy", "gz"]

        # Combine the acc and gyro dataframes
        combined_df = pd.concat([acc_df, gyro_df], axis=1)

        combined_data_list.append(combined_df)

    # Concatenate all dataframes
    final_df = pd.concat(combined_data_list, ignore_index=True)
    return final_df

# Load the best saved model and predict on new data
new_data = combine_pickles_with_class("newdata/", "data")

# Apply the same scaler used earlier
new_data_scaled = scaler.transform(new_data)

# Reshape the new data based on the same window_length used in training
new_data_reshaped = new_data_scaled.reshape(new_data_scaled.shape[0], 1, new_data_scaled.shape[1])

# Predict on the new data using the best model
new_pred = np.argmax(best_model.predict(new_data_reshaped), axis=1)

# Plot raw data with predicted classes
def plot_sensor_data(new_data, predicted_classes):
    fig, ax = plt.subplots(2, 1, figsize=(12, 10))

    # Plot accelerometer data
    ax[0].plot(new_data.index, new_data['ax'], label='ax', color='b', linewidth=1.5)
    ax[0].plot(new_data.index, new_data['ay'], label='ay', color='g', linewidth=1.5)
    ax[0].plot(new_data.index, new_data['az'], label='az', color='r', linewidth=1.5)
    ax[0].set_title("Accelerometer Data")
    ax[0].legend(loc="upper right")

    # Plot gyroscope data
    ax[1].plot(new_data.index, new_data['gx'], label='gx', color='b', linewidth=1.5)
    ax[1].plot(new_data.index, new_data['gy'], label='gy', color='g', linewidth=1.5)
    ax[1].plot(new_data.index, new_data['gz'], label='gz', color='r', linewidth=1.5)
    ax[1].set_title("Gyroscope Data")
    ax[1].legend(loc="upper right")

    # Adding vertical lines at points of class change
    class_changes = [i for i in range(1, len(predicted_classes)) if predicted_classes[i - 1] != predicted_classes[i]]
    for change_index in class_changes:
        ax[0].axvline(x=new_data.index[change_index], color="k", linestyle="--", alpha=0.5)
        ax[1].axvline(x=new_data.index[change_index], color="k", linestyle="--", alpha=0.5)

    plt.tight_layout()
    plt.show()

# Plot the new data and predicted classes
plot_sensor_data(new_data, new_pred)
