# Packages

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam, Adagrad, RMSprop
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedGroupKFold
from keras.layers import ConvLSTM2D, ConvLSTM1D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.callbacks import EarlyStopping
from scipy.stats import mode
import keras
from imblearn.over_sampling import RandomOverSampler

# Data Precessing Functions

In [1]:
# Define the function for oversampling
def oversample_data(X_train, y_train):
    # Create an instance of RandomOverSampler
    oversampler = RandomOverSampler(random_state=42)
    # Apply Random Oversampling to the training set
    X_resampled, y_resampled = oversampler.fit_resample(X_train, y_train)
    return X_resampled, y_resampled


def shuffle_sections(df):
    # Create a list of unique trial numbers
    trial_numbers = df['trial_number'].unique()
    
    # Shuffle the order of trial numbers
    np.random.shuffle(trial_numbers)
    
    # Create a list to store shuffled sections
    shuffled_sections = []
    
    # Iterate over shuffled trial numbers
    for trial_number in trial_numbers:
        # Extract rows for the current trial number
        section = df[df['trial_number'] == trial_number]
        # Append the section to the shuffled sections list
        shuffled_sections.append(section)
    
    # Concatenate the shuffled sections back into a DataFrame
    shuffled_df = pd.concat(shuffled_sections)
    
    return shuffled_df




def extract_mode_label(labels):
    # Calculate the mode of the labels within the sequence
    mode_label, _ = mode(labels)
    return mode_label



def csv_to_array(initial_csv):
    return np.squeeze(initial_csv.to_numpy(), axis = 1)





def sld_wnd(data, window, stride):
    result = np.lib.stride_tricks.sliding_window_view(data,window)[::stride, :]
    return result

def sld_wnd_2(data, window, stride):
    result = np.lib.stride_tricks.sliding_window_view(data,window)[::stride, :]
    return result




# Function to read x and y files, perform operations, and return the final dataframe
def process_files(x_file, y_file, trial_number):
    # Read x and y files
    feature = pd.read_csv(x_file, header=None)
    target = pd.read_csv(y_file, header=None)
    
    # Extract the index column
    feature_t = feature.iloc[:, 0].to_frame()
    target_t = target.iloc[:, 0].to_frame()
    
    # Drop the index column
    feature = feature.drop(feature.columns[0], axis=1)
    target = target.drop(target.columns[0], axis=1)
    
    # Set index using the extracted index column
    feature.index = csv_to_array(feature_t)
    target.index = csv_to_array(target_t)
    
    # Rename columns
    feature.columns = ['x_accelerometers', 'y_accelerometers', 'z_accelerometers', 'x_gyroscope', 'y_gyroscope', 'z_gyroscope']
    target.columns = ['target']
    
    # Concatenate feature and target
    data = pd.concat([feature, target], axis=1)
    
    # Sort by index
    data = data.sort_index(axis=0)
    
    # Interpolate target column
    data.interpolate(method = "linear", inplace = True) # linear interpolation for the missing values
    data.target = np.round(data.target)
    
    # Drop rows with NaN values
    #data = data.dropna()
    data.dropna(axis = 'index', inplace = True)
    
    # Add a column for trial number
    data['trial_number'] = trial_number
    
    return data




# Function to read x and y files, perform operations, and return the final dataframe
def process_files_test(feature, target):

    # Extract the index column
    feature_t = feature.iloc[:, 0].to_frame()
    target_t = target.iloc[:, 0].to_frame()
    
    # Drop the index column
    feature = feature.drop(feature.columns[0], axis=1)
    target = target.drop(target.columns[0], axis=1)
    
    # Set index using the extracted index column
    feature.index = csv_to_array(feature_t)
    target.index = csv_to_array(target_t)
    
    # Rename columns
    feature.columns = ['x_accelerometers', 'y_accelerometers', 'z_accelerometers', 'x_gyroscope', 'y_gyroscope', 'z_gyroscope']
    target.columns = ['target']
    
    # Concatenate feature and target
    data = pd.concat([feature, target], axis=1)
    
    # Sort by index
    data = data.sort_index(axis=0)
    
    # Interpolate target column
    data.interpolate(method = "linear", inplace = True) # linear interpolation for the missing values
    data.target = np.round(data.target)
    
    # Drop rows with NaN values
    #data = data.dropna()
    
    
    return data



# Importing the Data

In [None]:
# Initialize an empty DataFrame to store the final result
final_data = pd.DataFrame()

# Loop over x files
for i in range(1, 30):
    x_file = f"Trial{i:02d}_x.csv"
    y_file = f"Trial{i:02d}_y.csv"
    
    # Check if both x and y files exist
    if os.path.exists(x_file) and os.path.exists(y_file):
        # Process the files and concatenate the result with the final_data
        result = process_files(x_file, y_file, i)
        final_data = pd.concat([final_data, result])
        
                
final_data = shuffle_sections(final_data)

column_target = final_data.pop('target')

# Append the dropped column to the end of the DataFrame
final_data['target'] = column_target

print(final_data['target'].value_counts(normalize=True) * 100)



# Splitting the Data into Train and Test

In [None]:
# Make train, validation, and test data sets for each trial number, getting 80% of the data for training and 10% each for validation and test

train_X = []
val_X = []
test_X = []
train_Y = []
val_Y = []
test_Y = []

# Iterate over unique trial numbers in the dataset
for trial_number in pd.unique(final_data.loc[:, "trial_number"]):
    # Split data for the current trial number into training, validation, and test sets
    train_x, remaining_x, train_y, remaining_y = train_test_split(
        final_data[final_data["trial_number"] == trial_number].loc[:, "x_accelerometers":"z_gyroscope"], 
        final_data[final_data["trial_number"] == trial_number].loc[:, "target"], 
        test_size=0.20, shuffle=False)
    
    val_x, test_x, val_y, test_y = train_test_split(
        remaining_x, remaining_y, test_size=0.50, shuffle=False)
    
    oversampler = RandomOverSampler(random_state=42)
    train_x_resampled, train_y_resampled = oversampler.fit_resample(train_x, train_y)
    
    
    # Append the training, validation, and test data for features and labels to respective lists
    train_X.append(train_x_resampled)
    val_X.append(val_x)
    test_X.append(test_x)
    train_Y.append(train_y_resampled)
    val_Y.append(val_y)
    test_Y.append(test_y)
    
# Concatenate the lists of training, validation, and test data to create single dataframes
train_X = pd.concat(train_X)
val_X = pd.concat(val_X)
test_X = pd.concat(test_X)
train_Y = pd.concat(train_Y)
val_Y = pd.concat(val_Y)
test_Y = pd.concat(test_Y)

# Concatenate features and labels to create train, validation, and test data
train_data = pd.concat([train_X, train_Y], axis=1)
val_data = pd.concat([val_X, val_Y], axis=1)
test_data = pd.concat([test_X, test_Y], axis=1)


# Generating Sequences of Data Followed by Reshaping

In [None]:
window_size = 64
step = 1

# Apply sliding window transformation to training features
train_X = sld_wnd_2(train_X, (window_size, 6), step)

# Apply sliding window transformation to validation features
val_X = sld_wnd_2(val_X, (window_size, 6), step)

# Apply sliding window transformation to test features
test_X = sld_wnd_2(test_X, (window_size, 6), step)

# Apply sliding window transformation to training labels
train_Y = sld_wnd(train_Y, window_size, step)

# Apply sliding window transformation to validation labels
val_Y = sld_wnd(val_Y, window_size, step)

# Apply sliding window transformation to test labels
test_Y = sld_wnd(test_Y, window_size, step)


train_Y_mode = [extract_mode_label(train_Y[i]) for i in range(train_Y.shape[0])]
train_Y = np.array(train_Y_mode)

# Extract mode label for validation set
val_Y_mode = [extract_mode_label(val_Y[i]) for i in range(val_Y.shape[0])]
val_Y = np.array(val_Y_mode)

# Extract mode label for test set
test_Y_mode = [extract_mode_label(test_Y[i]) for i in range(test_Y.shape[0])]
test_Y = np.array(test_Y_mode)



# Reshape training features
train_X = np.reshape(train_X, (len(train_X), window_size, 6))

# Reshape validation features
val_X = np.reshape(val_X, (len(val_X), window_size, 6))

# Reshape test features
test_X = np.reshape(test_X, (len(test_X), window_size, 6))

# Reshape training labels
train_Y = np.reshape(train_Y, (len(train_Y), 1))

# Reshape validation labels
val_Y = np.reshape(val_Y, (len(val_Y), 1))

# Reshape test labels
test_Y = np.reshape(test_Y, (len(test_Y), 1))

In [None]:
# Reshape train_X to 2D array
train_X_flattened = train_X.reshape(-1, train_X.shape[-1])

# Reshape val_X to 2D array
val_X_flattened = val_X.reshape(-1, val_X.shape[-1])

# Reshape test_X to 2D array
test_X_flattened = test_X.reshape(-1, test_X.shape[-1])

# Create a scaler object
scaler = StandardScaler()

# Fit the scaler on training data and transform the flattened data
train_X_scaled_flattened = scaler.fit_transform(train_X_flattened)

# Transform the flattened validation and test data using the scaler fitted on training data
val_X_scaled_flattened = scaler.transform(val_X_flattened)
test_X_scaled_flattened = scaler.transform(test_X_flattened)

# Reshape the scaled data back to 3D array for train, val, and test
train_X_scaled = train_X_scaled_flattened.reshape(train_X.shape)
val_X_scaled = val_X_scaled_flattened.reshape(val_X.shape)
test_X_scaled = test_X_scaled_flattened.reshape(test_X.shape)

#X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
#X_val_scaled = X_val_scaled.reshape(X_val_scaled.shape[0], X_val_scaled.shape[1], 1)
#X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)

# Model Compiling

In [None]:
n_timesteps = train_X.shape[1]
n_features = train_X.shape[2]
n_outputs = train_Y.shape[1]
n_steps = 4
n_length = 16
train_X_scaled = train_X_scaled.reshape((train_X_scaled.shape[0], n_steps, 1, n_length, n_features))
val_X_scaled = val_X_scaled.reshape((val_X_scaled.shape[0], n_steps, 1, n_length, n_features))
test_X_scaled = test_X_scaled.reshape((test_X_scaled.shape[0], n_steps, 1, n_length, n_features))

#train_X_scaled_copy = train_X_scaled_copy.reshape((train_X_scaled_copy.shape[0], n_steps, 1, n_length, n_features))
#val_X_scaled_copy = val_X_scaled_copy.reshape((val_X_scaled_copy.shape[0], n_steps, 1, n_length, n_features))
#test_X_scaled_copy = test_X_scaled_copy.reshape((test_X_scaled_copy.shape[0], n_steps, 1, n_length, n_features))

model = Sequential()
model.add(ConvLSTM2D(filters = 64, kernel_size = (1,3), input_shape = (n_steps, 1, n_length, n_features)))
model.add(keras.layers.LeakyReLU())
model.add(BatchNormalization())
#model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(75))
model.add(keras.layers.LeakyReLU())
model.add(BatchNormalization())
#model.add(Dropout(0.5))

model.add(Dense(100))
model.add(keras.layers.LeakyReLU())
model.add(BatchNormalization())
#model.add(Dropout(0.5))

model.add(Dense(4, activation = 'softmax'))
# Compile the model
optimizer = Adam(learning_rate=0.01)
early_stopping = EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 5, restore_best_weights = True)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print(model.summary())



history = model.fit(train_X_scaled, train_Y, epochs=20, batch_size=500, validation_data=(val_X_scaled, val_Y), verbose = 1, callbacks = [early_stopping])



# Plots

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(test_X_scaled, test_Y)
print("Test Accuracy:", test_acc)
print(test_loss)

# Unseen data Testing

In [None]:
for i in range(1, 5):  # Iterate from 1 to 4
    # Load test set data
    test_x_filename = f"Test0{i}_x.csv"
    test_y_filename = f"Test0{i}_y.csv"
    test_x_path = os.path.dirname(test_x_filename)
    test_x = pd.read_csv(test_x_filename, header=None)
    test_y = pd.read_csv(test_y_filename, header=None)
    
    data2 = process_files_test(test_x, test_y)
    X_test01 = data2.iloc[:, 0:6].values
    
    X_test01 = sld_wnd_2(X_test01, (window_size, 6), step)
    X_test01 = np.reshape(X_test01, (len(X_test01), window_size, 6))
    X_test01_flattened = X_test01.reshape(-1, X_test01.shape[-1])
    X_test01_scaled_flattened = scaler.transform(X_test01_flattened)
    X_test01_scaled= X_test01_scaled_flattened.reshape(X_test01.shape)
    X_test01_scaled = X_test01_scaled.reshape((X_test01_scaled.shape[0], n_steps, 1, n_length, n_features))
    predictions = model.predict(X_test01_scaled, batch_size=3000)
    
    predicted_classes = np.argmax(predictions, axis=1)
    data2.reset_index(drop=False, inplace=True)
    data2['target'][:len(predicted_classes)] = predicted_classes
    data2['target'] = data2['target'].fillna(method='ffill')
    
    
    time_column_name = "time"  # Replace this with the actual name of your time column
    data2 = data2.rename(columns={data2.columns[0]: time_column_name})
    test_y = test_y.rename(columns={test_y.columns[0]: time_column_name})

    # Merge the two dataframes on the time column to find common rows
    merged_df = pd.merge(data2, test_y, on=time_column_name, how='inner')

    # Now, select only the time and label columns from df1
    new_df = merged_df[[time_column_name, 'target']]  # Replace 'label' with the actual name of your label column in df1

    # Write the resulting DataFrame to a CSV file in the same directory
    output_filename = f"result_test0{i}.csv"
    output_file_path = os.path.join(test_x_path, output_filename)
    new_df.to_csv(output_file_path, header=False, index=False)
