In [None]:
# Import libraries
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.models import Sequential
from keras.layers import GRU, Dense, Dropout
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, train_test_split


# Function to clean NaN and infinite values
def clean_data(X_train, X_valid, X_test):
    X_train.fillna(X_train.mean(), inplace=True)
    X_valid.fillna(X_valid.mean(), inplace=True)
    X_test.fillna(X_test.mean(), inplace=True)
    return X_train, X_valid, X_test


# Load data
data_train = pd.read_pickle("clean_data/train/all_data_train.pkl")
data_valid = pd.read_pickle("clean_data/valid/all_data_valid.pkl")
data_test = pd.read_pickle("clean_data/test/all_data_test.pkl")

# Split the data into features (X) and target (y)
X_train = data_train.drop("class", axis=1)
X_valid = data_valid.drop("class", axis=1)
X_test = data_test.drop("class", axis=1)

y_train = data_train["class"]
y_valid = data_valid["class"]
y_test = data_test["class"]

# Encode the target variable (multi-class encoding)
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
encoded_y_valid = encoder.transform(y_valid)
encoded_y_test = encoder.transform(y_test)

# Clean the data (replace NaNs and infinities)
X_train, X_valid, X_test = clean_data(X_train, X_valid, X_test)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)


# Function to build a model with variable layers and neurons
def build_gru_model(
    num_neurons=32, dropout_rate=0.2, learning_rate=0.001, input_shape=(100, 6)
):
    model = Sequential()
    model.add(GRU(num_neurons, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(dropout_rate))
    model.add(GRU(num_neurons))
    model.add(
        Dense(4, activation="softmax")
    )  # 4 classes for multi-class classification
    model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return model


# Reshape data for the GRU model
def reshape_data(X, window_length):
    num_features = X.shape[1]
    return X.reshape(X.shape[0], window_length, num_features // window_length)


# Prepare the reshaped data
window_length = 4  # Example window size
X_train_reshaped = reshape_data(X_train, window_length)
X_valid_reshaped = reshape_data(X_valid, window_length)
X_test_reshaped = reshape_data(X_test, window_length)

# Wrap the model in KerasClassifier for GridSearchCV
model = KerasClassifier(
    build_fn=build_gru_model,
    input_shape=(window_length, X_train_reshaped.shape[2]),
    verbose=0,
)

# Define hyperparameter grid for tuning
param_grid = {
    "batch_size": [32, 64],
    "epochs": [10, 20],
    "num_neurons": [32, 64, 128],
    "dropout_rate": [0.2, 0.3, 0.4],
}

# Grid Search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train_reshaped, encoded_y_train)

# Output the best hyperparameters
print(f"Best Hyperparameters: {grid_result.best_params_}")
print(f"Best Score: {grid_result.best_score_}")

# Evaluate the best model on the test set
best_model = grid_result.best_estimator_
test_accuracy = best_model.score(X_test_reshaped, encoded_y_test)
print(f"Test accuracy: {test_accuracy}")

# Predict on test data
y_pred = best_model.predict(X_test_reshaped)
cm = confusion_matrix(encoded_y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="g")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()


# Load new data and predict (for grade 4)
def combine_pickles_with_class(path, class_name):
    acc_files = []
    gyro_files = []
    for file in os.listdir(path):
        if file.startswith(class_name) and file.endswith("_acc.pkl"):
            acc_files.append(file)
        elif file.startswith(class_name) and file.endswith("_gyro.pkl"):
            gyro_files.append(file)

    combined_data_list = []
    for i in range(len(acc_files)):
        acc_df = pd.read_pickle(path + acc_files[i])
        gyro_df = pd.read_pickle(path + gyro_files[i])

        if len(acc_df.columns) == 4:
            acc_df.drop(acc_df.columns[0], axis=1, inplace=True)
            gyro_df.drop(gyro_df.columns[0], axis=1, inplace=True)

        acc_df.columns = ["ax", "ay", "az"]
        gyro_df.columns = ["gx", "gy", "gz"]
        combined_df = pd.concat([acc_df, gyro_df], axis=1)
        combined_data_list.append(combined_df)

    final_df = pd.concat(combined_data_list, ignore_index=True)
    return final_df


# Load the best saved model
best_model = load_model("best_gru_model.h5")

# Load new data
new_data = combine_pickles_with_class("newdata/", "data")

# Apply the same scaler used earlier
new_data_scaled = scaler.transform(new_data)

# Reshape the new data based on the same window_length used in training
new_data_reshaped = reshape_data(new_data_scaled, window_length)

# Predict on the new data using the best model
new_pred = best_model.predict(new_data_reshaped)

# Get the predicted classes
new_pred_classes = np.argmax(new_pred, axis=1)
print(new_pred_classes)