# I-MOTION Model 

In [1]:
import torch
import torch.nn as nn
import random
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

# Set a fixed random seed for reproducibility across multiple libraries
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
window_size = 600
# Check for CUDA (GPU support) and set device accordingly
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # For multi-GPU setups
    # Additional settings for ensuring reproducibility on CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    device = torch.device("cpu")
    print("CUDA not available. Using CPU.")

CUDA not available. Using CPU.


## Load Data

In [4]:
data_x = np.loadtxt("./train/raw_data/Acc_x.txt", delimiter=" ")
data_y = np.loadtxt("./train/raw_data/Acc_y.txt", delimiter=" ")
data_z = np.loadtxt("./train/raw_data/Acc_z.txt", delimiter=" ")
train_label = np.loadtxt("./train/train_label.txt", delimiter=" ")
train_order = np.loadtxt("./train/train_order.txt", dtype=float)
acc_data = np.dstack((data_x, data_y, data_z, train_label))
acc_data = acc_data[train_order.astype(np.int_).flatten() - 1, :, :]
window_data = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), window_size, 3), dtype=np.float32)
windowed_labels = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), 1), dtype=np.float32)

window_index = 0
for frame_index in range(acc_data.shape[0]):
    
    frame_features = acc_data[frame_index, :, :3]
    frame_labels = acc_data[frame_index, :, 3]
    
    for start in range(0, acc_data.shape[1], window_size):
        end = start + window_size
        window_data[window_index] = frame_features[start:end, :]
        # Assuming all samples in a window have the same label, take the label of the first sample
        windowed_labels[window_index] = frame_labels[start]
        window_index += 1

# Verify the shapes
print("Features shape:", window_data.shape)
print("Labels shape:", windowed_labels.shape)
    

(16310, 6000, 4)
(163100, 600, 3)
Features shape: [[[-2.36748   9.603528 -1.200628]
  [-2.442098  9.238089 -1.462477]
  [-2.774196  8.611417 -1.739763]
  ...
  [-1.525542  9.455682  0.303084]
  [-1.469803  9.531071  0.279402]
  [-1.421543  9.6702    0.285504]]

 [[-1.367722  9.750951  0.306458]
  [-1.334744  9.776136  0.295097]
  [-1.316464  9.764919  0.287304]
  ...
  [ 0.137368 12.029835 -2.38063 ]
  [ 0.710383 13.127102 -2.832197]
  [ 1.297946 13.61043  -2.181224]]

 [[ 2.160904 13.322191 -1.865706]
  [ 2.389481 12.746821 -1.159406]
  [ 2.072968 12.296709 -1.122953]
  ...
  [-0.124745  8.965409 -2.24249 ]
  [-0.341386  8.807157 -2.135628]
  [-0.476393  8.848304 -2.124934]]

 ...

 [[ 1.465765  4.178371 -8.714756]
  [ 1.023613  3.981262 -8.475474]
  [ 1.223304  3.938922 -8.735217]
  ...
  [ 1.207551  3.81527  -8.77083 ]
  [ 1.504728  3.894734 -8.909285]
  [ 1.894138  3.928993 -9.014277]]

 [[ 1.815587  3.903015 -8.920031]
  [ 1.397466  3.75651  -8.670224]
  [ 1.29213   3.754843 -8.61

## Helper Function

In [3]:
def plot_training_losses(train_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses)
    plt.title('Training Loss Over Epochs', fontsize=16)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.legend()
    plt.grid(True)
    plt.show()


def plot_metric_bar_charts(names, metric_values, metric_names):
    """
    Creates bar charts for different evaluation metrics across various normalization techniques.

    Args:
        names (list of str): Names of the normalization techniques.
        metric_values (list of lists): Each sublist contains the values of a metric for each normalization technique.
        metric_names (list of str): Names of the metrics being plotted.

    This function plots a bar chart for each provided metric, comparing the performance of different normalization techniques,
    with y-axis limits dynamically adjusted to emphasize differences while capping at 1.
    """
    num_metrics = len(metric_names)
    num_rows = num_cols = int(math.ceil(math.sqrt(num_metrics)))

    sns.set(style='whitegrid', palette='muted', font_scale=1.2)

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(12 * num_cols, 8 * num_rows))
    fig.subplots_adjust(hspace=0.4, wspace=0.4)

    for i, metric_name in enumerate(metric_names):
        ax = axes.flatten()[i] if num_metrics > 1 else axes

        # Create DataFrame for seaborn
        data = pd.DataFrame({
            'Normalization Technique': np.repeat(names, len(metric_values[i])),
            metric_name: np.concatenate([metric_values[i] for _ in names])
        })

        sns.barplot(x='Normalization Technique', y=metric_name, data=data, ax=ax, alpha=0.75)

        # Dynamically adjust the y-axis limits
        min_val = min(data[metric_name]) * 0.9  # Start slightly below the smallest value for better visibility
        max_val = 1  # Ensuring the upper limit is 1
        ax.set_ylim([min_val, max_val])

        ax.set_xlabel('Normalization Technique', fontsize=14)
        ax.set_ylabel(f'{metric_name} Value', fontsize=14)
        ax.set_title(f'Comparison of {metric_name}', fontsize=16)
        ax.tick_params(axis='x', rotation=45, labelsize=12)
        ax.tick_params(axis='y', labelsize=12)

        # Add text labels above bars
        for p, value in zip(ax.patches, np.concatenate([metric_values[i] for _ in names])):
            ax.text(p.get_x() + p.get_width() / 2., p.get_height(), f'{value:.2f}', ha='center', va='bottom', fontsize=10)

    # Hide unused subplots if the number of metrics is less than the number of subplot positions
    for i in range(num_metrics, num_rows * num_cols):
        if num_rows * num_cols == 1:
            break
        fig.delaxes(axes.flatten()[i])

    plt.tight_layout()
    plt.show()

def calculate_padding(kernel_size):
    return (kernel_size - 1) // 2

def calculate_output_length(input_length, kernel_size, stride, padding):
    return ((input_length + 2 * padding - kernel_size) // stride) + 1

def calculate_pooling_padding(window_size, stride, input_size, output_size):
    return ((output_size - 1) * stride - input_size + window_size) // 2

## Preprocessing

In [None]:
def preprocess_accelerometer_data(accel_data, alpha=0.8, m=5):
    # Initialize gravity and linear acceleration
    gravity = np.zeros((1, 3))
    linear_acceleration = np.zeros_like(accel_data)
    
    # Step 1: Remove Gravity
    for i in range(accel_data.shape[0]):
        gravity = alpha * gravity + (1 - alpha) * accel_data[i, :]
        linear_acceleration[i, :] = accel_data[i, :] - gravity

    # Step 2: Smooth Data
    K = len(linear_acceleration.shape(0))
    smoothed_data = np.zeros_like(linear_acceleration)
    
    for k in range(1, K + 1):  # k is 1-indexed in the mathematical formula
        if k <= m // 2:
            # Early data points: smaller window size that grows
            smoothed_data[k - 1, :] = np.sum(linear_acceleration[:2 * k - 1, :], axis=0) / (2 * k - 1)
        elif k > K - m // 2:
            # Late data points: smaller window size that shrinks
            smoothed_data[k - 1, :] = np.sum(linear_acceleration[2 * k - K - 1:, :]) / (2 * (K - k) + 1)
        else:
            # Middle data points: fixed window size
            smoothed_data[k - 1, :] = np.sum(linear_acceleration[k - m // 2 - 1 : k + m // 2, :]) / m

    # Step 3: Calculate Magnitude
    magnitudes = np.sqrt(np.sum(smoothed_data**2, axis=1)).reshape(1,-1)

    return magnitudes

processed_data = np.empty((window_size, window_data.shape[0]))
for i in range(window_data.shape[0]):
    processed_data[i, :] = preprocess_accelerometer_data(window_data[i, :, :])

print(processed_data.shape)  # Output the shape of the processed data


## Model Construction

In [None]:
class IMOTION_CNN(nn.Module):
    def __init__(self, in_feature=0, in_channel=0, out_feature=0, pool_window_size=0, pool_stride_size=0, 
                 pool_padding=0, conv_filter_list=0, conv_kernel_list=0, conv_stride=0, full_connection_size=0):
        super(IMOTION_CNN, self).__init__()
        self.pool = nn.MaxPool1d(kernel_size=pool_window_size, stride=pool_stride_size, padding=pool_padding)
        assert len(conv_filter_list) == len(conv_kernel_list)
        self.conv_layers = nn.ModuleList([
            nn.Conv1d(in_channels=in_channel if i == 0 else conv_filter_list[i-1], 
                      out_channels=conv_filter_list[i], 
                      kernel_size=conv_kernel_list[i], stride=conv_stride)
            for i in range(len(conv_filter_list))
        ])
        self.final_matrix_size = conv_filter_list[-1] * (in_feature / (2 ** len(conv_filter_list)))
        self.fc1 = nn.Linear(self.final_matrix_size, full_connection_size) 
        self.fc2 = nn.Linear(full_connection_size, out_feature)


    def forward(self, x):
        for conv in self.conv_layers:
            x = self.pool(torch.relu(conv(x)))
        x = x.view(-1, self.final_matrix_size)  # Flattening the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Dataset

In [None]:
# Define a custom Dataset
class AccDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)  # Convert features to PyTorch tensors
        self.labels = torch.tensor(labels, dtype=torch.float32)       # Convert labels to PyTorch tensors
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

def prepare_data_loaders(train_dataset, test_dataset, batch_size=10):

    train_dataset = AccDataset(train_dataset)
    test_dataset = AccDataset(test_dataset)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

## Training & Evaluation

In [6]:
def train_model(model, train_loader, optimizer, criterion, epochs=100, device='cpu'):
    model.train()  # Set the model to training mode
    loss_values = []  # Initialize a list to store the average loss per epoch

    for epoch in range(epochs):
        total_loss = 0  # Track total loss for each epoch

        for X, A, labels in train_loader:
            # Move data to the specified device
            X, A, labels = X.to(device), A.to(device), labels.to(device)

            optimizer.zero_grad()  # Clear gradients for the next train step
            output = model(X, A)  # Forward pass
            loss = criterion(output, labels)  # Compute the loss
            loss.backward()  # Backward pass to compute gradients
            optimizer.step()  # Update model parameters

            total_loss += loss.item()  # Accumulate the loss

        avg_loss = total_loss / len(train_loader)  # Calculate average loss
        loss_values.append(avg_loss)  # Append average loss to list

        # Print the average loss for the current epoch
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')

    return loss_values


def evaluate_model(model, test_loader, device='cpu'):
    """
    Evaluates the model on a test dataset.

    Args:
        model (torch.nn.Module): The neural network model to be evaluated.
        test_loader (torch.utils.data.DataLoader): DataLoader for the test data.
        device (str, optional): The device to run the model on ('cpu' or 'cuda'). Defaults to 'cpu'.

    Returns:
        tuple: A tuple containing the accuracy, precision, recall, and F1 score of the model on the test dataset.

    This function performs a forward pass on the test dataset to obtain the model's predictions,
    then calculates and returns various evaluation metrics including accuracy, precision, recall, and F1 score.
    """
    model.eval()  # Set the model to evaluation mode
    true_labels = []  # List to store actual labels
    predictions = []  # List to store model predictions

    with torch.no_grad():  # Disable gradient computation
        for X, A, labels in test_loader:
            # Move data to the specified device
            X, A, labels = X.to(device), A.to(device), labels.to(device)

            output = model(X, A)  # Forward pass
            _, predicted = torch.max(output.data, 1)  # Get the index of the max log-probability

            true_labels += labels.tolist()  # Append actual labels
            predictions += predicted.tolist()  # Append predicted labels

    # Calculate evaluation metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')
    f1 = f1_score(true_labels, predictions, average='weighted')

    return accuracy, precision, recall, f1

## Main Script

In [None]:
def main():
    """
    Main execution function to train and evaluate Graph Convolutional Network (GCN) models
    with different graph normalization techniques, visualize training metrics, and perform
    embedding analysis through PCA.

    Assumes the presence of a GCN model class, data loader preparation functions, and
    various normalization technique functions defined outside this script.
    """
    # Configuration parameters
    num_samples_per_type = 1000  # Number of samples per class/type
    num_epochs = 200  # Number of training epochs
    # Dictionary mapping normalization technique names to their corresponding functions

    # Lists for storing evaluation metrics and model information
    metric_values = [[] for _ in range(4)]  # Lists to store Accuracy, Precision, Recall, F1 Score
    train_losses = []  # Training loss values for each normalization technique

    # Set the computation device (GPU or CPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\nTraining model")
    # Prepare data loaders
    train_loader, test_loader = prepare_data_loaders(num_samples_per_type, batch_size=50)
    print(f"DataLoader batch size: {train_loader.batch_size}")

    # Initialize the GCN model, optimizer, and loss criterion
    model = IMOTION_CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    train_losses = train_model(model, train_loader, optimizer, criterion, epochs=num_epochs, device=device)

    # Evaluate the model's performance
    accuracy, precision, recall, f1 = evaluate_model(model, test_loader, device=device)
    # Store the evaluation metrics
    metric_values[0].append(accuracy)
    metric_values[1].append(precision)
    metric_values[2].append(recall)
    metric_values[3].append(f1)

    # Output the evaluation results
    print(f"Results  - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    # Visualization of training losses and evaluation metrics for each normalization technique
    metric_names = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
    plot_training_losses(train_losses)
    plot_metric_bar_charts(normalization_names, metric_values, metric_names)

if __name__ == "__main__":
    main()
