# I-MOTION Model 

In [1]:
import time

import torch
import torch.nn as nn
import random
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

# Set a fixed random seed for reproducibility across multiple libraries
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
window_size = 600
# Check for CUDA (GPU support) and set device accordingly
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # For multi-GPU setups
    # Additional settings for ensuring reproducibility on CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    device = torch.device("cpu")
    print("CUDA not available. Using CPU.")

CUDA not available. Using CPU.


## Load Data

In [2]:
train_data_x = np.loadtxt("./train/raw_data/Acc_x.txt", delimiter=" ")
train_data_y = np.loadtxt("./train/raw_data/Acc_y.txt", delimiter=" ")
train_data_z = np.loadtxt("./train/raw_data/Acc_z.txt", delimiter=" ")
train_label = np.loadtxt("./train/train_label.txt", delimiter=" ")
train_order = np.loadtxt("./train/train_order.txt", dtype=float)
acc_data = np.dstack((train_data_x, train_data_y, train_data_z, train_label))
acc_data = acc_data[train_order.astype(np.int_).flatten() - 1, :, :]
window_data = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), window_size, 3), dtype=np.float32)
windowed_labels = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), 1), dtype=np.float32)

window_index = 0
for frame_index in range(acc_data.shape[0]):
    
    frame_features = acc_data[frame_index, :, :3]
    frame_labels = acc_data[frame_index, :, 3]
    
    for start in range(0, acc_data.shape[1], window_size):
        end = start + window_size
        window_data[window_index] = frame_features[start:end, :]
        # Assuming all samples in a window have the same label, take the label of the first sample
        windowed_labels[window_index] = frame_labels[start]
        window_index += 1
        
test_data_x = np.loadtxt("./test/test_raw_data/Acc_x.txt", delimiter=" ")
test_data_y = np.loadtxt("./test/test_raw_data/Acc_y.txt", delimiter=" ")
test_data_z = np.loadtxt("./test/test_raw_data/Acc_z.txt", delimiter=" ")
test_label = np.loadtxt("./test/test_label.txt", delimiter=" ")
test_order = np.loadtxt("./test/test_order.txt", dtype=float)
acc_data = np.dstack((test_data_x, test_data_y, test_data_z, test_label))
acc_data = acc_data[test_order.astype(np.int_).flatten() - 1, :, :]
test_window_data = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), window_size, 3), dtype=np.float32)
test_windowed_labels = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), 1), dtype=np.float32)

window_index = 0
for frame_index in range(acc_data.shape[0]):
    
    frame_features = acc_data[frame_index, :, :3]
    frame_labels = acc_data[frame_index, :, 3]
    
    for start in range(0, acc_data.shape[1], window_size):
        end = start + window_size
        test_window_data[window_index] = frame_features[start:end, :]
        # Assuming all samples in a window have the same label, take the label of the first sample
        test_windowed_labels[window_index] = frame_labels[start]
        window_index += 1

# Verify the shapes
print("Train Features shape:", window_data.shape)
print("Train Labels shape:", windowed_labels.shape)
print("Test Features shape:", test_window_data.shape)
print("Test Labels shape:", test_windowed_labels.shape)
    

Train Features shape: (163100, 600, 3)
Train Labels shape: (163100, 1)
Test Features shape: (56980, 600, 3)
Test Labels shape: (56980, 1)


## Helper Function

In [3]:
def plot_training_losses(train_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses)
    plt.title('Training Loss Over Epochs', fontsize=16)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.legend()
    plt.grid(True)
    plt.show()


def calculate_padding(kernel_size):
    return (kernel_size - 1) // 2

def calculate_output_length(input_length, kernel_size, stride, padding):
    return ((input_length + 2 * padding - kernel_size) // stride) + 1

def calculate_pooling_padding(window_size, stride, input_size, output_size):
    return ((output_size - 1) * stride - input_size + window_size) // 2

## Preprocessing

In [5]:
import time
from scipy.signal import butter, filtfilt
def preprocess_accelerometer_data(accel_data, m=5, cutoff=0.001, fs=100, order=5):
    # Initialize gravity and linear acceleration
    
    # Step 1: Remove Gravity
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    linear_acceleration = filtfilt(b, a, accel_data, axis=0)
    
    # Step 2: Smooth Data
    # Create a moving average (MA) filter
    window = np.ones(m) / m
    # Apply the moving average filter to each column
    smoothed_data = np.zeros_like(linear_acceleration)
    for i in range(linear_acceleration.shape[1]):
        smoothed_data[:, i] = np.convolve(linear_acceleration[:, i], window, mode='same')

    # Step 3: Calculate Magnitude
    magnitudes = np.sqrt(np.sum(smoothed_data**2, axis=1))
    return magnitudes


# window_data size is 163100 x 600 x 3.
train_processed_data = np.array([preprocess_accelerometer_data(window_data[i, :, :]) for i in range(window_data.shape[0])])
test_processed_data = np.array([preprocess_accelerometer_data(test_window_data[i, :, :]) for i in range(test_window_data.shape[0])])
print(train_processed_data.shape)  # Output the shape of the processed data
print(test_processed_data.shape)

(163100, 600)
(56980, 600)


In [6]:
"""
for k in range(1, K + 1):  # k is 1-indexed in the mathematical formula
        if k <= m // 2:
            # Early data points: smaller window size that grows
            smoothed_data[k - 1, :] = np.sum(linear_acceleration[:2 * k - 1, :], axis=0) / (2 * k - 1)
        elif k > K - m // 2:
            # Late data points: smaller window size that shrinks
            smoothed_data[k - 1, :] = np.sum(linear_acceleration[2 * k - K - 1:, :]) / (2 * (K - k) + 1)
        else:
            # Middle data points: fixed window size
            smoothed_data[k - 1, :] = np.sum(linear_acceleration[k - m // 2 - 1 : k + m // 2, :]) / m
"""

'\nfor k in range(1, K + 1):  # k is 1-indexed in the mathematical formula\n        if k <= m // 2:\n            # Early data points: smaller window size that grows\n            smoothed_data[k - 1, :] = np.sum(linear_acceleration[:2 * k - 1, :], axis=0) / (2 * k - 1)\n        elif k > K - m // 2:\n            # Late data points: smaller window size that shrinks\n            smoothed_data[k - 1, :] = np.sum(linear_acceleration[2 * k - K - 1:, :]) / (2 * (K - k) + 1)\n        else:\n            # Middle data points: fixed window size\n            smoothed_data[k - 1, :] = np.sum(linear_acceleration[k - m // 2 - 1 : k + m // 2, :]) / m\n'

## Model Construction

In [51]:
class IMOTION_CNN(nn.Module):
    def __init__(self, in_feature=600, in_channel=1, out_feature=7, pool_window_size=4, pool_stride_size=2, 
                 pool_padding=1, conv_filter_list=[32, 64, 64, 64, 64, 64], conv_kernel_list=[15, 10, 10, 5, 5, 5], conv_stride=1, full_connection_size=200):
        super(IMOTION_CNN, self).__init__()
        self.pool = nn.MaxPool1d(kernel_size=pool_window_size, stride=pool_stride_size, padding=pool_padding)
        assert len(conv_filter_list) == len(conv_kernel_list)
        self.conv_layers = nn.ModuleList([
            nn.Conv1d(in_channels=in_channel if i == 0 else conv_filter_list[i-1], 
                      out_channels=conv_filter_list[i], 
                      kernel_size=conv_kernel_list[i], stride=conv_stride, padding=calculate_padding(conv_kernel_list[i]))
            for i in range(len(conv_filter_list))
        ])
        self.final_matrix_size = conv_filter_list[-1] * (in_feature // (2 ** (len(conv_filter_list))))
        self.fc1 = nn.Linear(self.final_matrix_size, full_connection_size) 
        self.fc2 = nn.Linear(full_connection_size, out_feature)


    def forward(self, x):
        for conv in self.conv_layers:
            print("x: " + str(x.shape))
            x = self.pool(torch.relu(conv(x)))
        x = x.view(x.shape[0], -1)  # Flattening the tensor
        print("x: " + str(x.shape))
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Dataset

In [52]:
# Define a custom Dataset
class AccDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)  # Convert features to PyTorch tensors
        self.labels = torch.tensor(labels, dtype=torch.int32)       # Convert labels to PyTorch tensors
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

def prepare_data_loaders(train_features, train_labels, test_features, test_labels, batch_size=10):

    train_dataset = AccDataset(train_features, train_labels)
    test_dataset = AccDataset(test_features, test_labels)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

## Training & Evaluation

In [53]:
def train_model(model, train_loader, optimizer, criterion, epochs=100, device='cpu'):
    model.train()  # Set the model to training mode
    loss_values = []  # Initialize a list to store the average loss per epoch

    for epoch in range(epochs):
        total_loss = 0  # Track total loss for each epoch

        for X, labels in train_loader:
            # Move data to the specified device
            X, labels = X.to(device), labels.to(device)
            X = X.unsqueeze(1)
            print(X.shape)
            optimizer.zero_grad()  # Clear gradients for the next train step
            output = model(X)  # Forward pass
            loss = criterion(output, labels)  # Compute the loss
            loss.backward()  # Backward pass to compute gradients
            optimizer.step()  # Update model parameters

            total_loss += loss.item()  # Accumulate the loss

        avg_loss = total_loss / len(train_loader)  # Calculate average loss
        loss_values.append(avg_loss)  # Append average loss to list

        # Print the average loss for the current epoch
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')

    return loss_values


def evaluate_model(model, test_loader, device='cpu'):
    model.eval()  # Set the model to evaluation mode
    true_labels = []  # List to store actual labels
    predictions = []  # List to store model predictions

    with torch.no_grad():  # Disable gradient computation
        for X, labels in test_loader:
            # Move data to the specified device
            X, labels = X.to(device), labels.to(device)

            output = model(X)  # Forward pass
            _, predicted = torch.max(output.data, 1)  # Get the index of the max log-probability

            true_labels += labels.tolist()  # Append actual labels
            predictions += predicted.tolist()  # Append predicted labels

    # Calculate evaluation metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')
    f1 = f1_score(true_labels, predictions, average='weighted')

    return accuracy, precision, recall, f1

## Main Script

In [54]:
def main():
    """
    Main execution function to train and evaluate Graph Convolutional Network (GCN) models
    with different graph normalization techniques, visualize training metrics, and perform
    embedding analysis through PCA.

    Assumes the presence of a GCN model class, data loader preparation functions, and
    various normalization technique functions defined outside this script.
    """
    # Configuration parameters
    num_epochs = 200  # Number of training epochs
    # Dictionary mapping normalization technique names to their corresponding functions

    # Lists for storing evaluation metrics and model information
    metric_values = [[] for _ in range(4)]  # Lists to store Accuracy, Precision, Recall, F1 Score
    train_losses = []  # Training loss values for each normalization technique

    # Set the computation device (GPU or CPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\nTraining model")
    # Prepare data loaders
    train_loader, test_loader = prepare_data_loaders(train_processed_data, windowed_labels, test_processed_data, test_windowed_labels, batch_size=50)
    print(f"DataLoader batch size: {train_loader.batch_size}")

    # Initialize the GCN model, optimizer, and loss criterion
    model = IMOTION_CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    train_losses = train_model(model, train_loader, optimizer, criterion, epochs=num_epochs, device=device)

    # Evaluate the model's performance
    accuracy, precision, recall, f1 = evaluate_model(model, test_loader, device=device)
    # Store the evaluation metrics
    metric_values[0].append(accuracy)
    metric_values[1].append(precision)
    metric_values[2].append(recall)
    metric_values[3].append(f1)

    # Output the evaluation results
    print(f"Results  - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    # Visualization of training losses and evaluation metrics for each normalization technique
    metric_names = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
    plot_training_losses(train_losses)

if __name__ == "__main__":
    main()



Training model
DataLoader batch size: 50
torch.Size([50, 1, 600])
x: torch.Size([50, 1, 600])
x: torch.Size([50, 32, 300])
x: torch.Size([50, 64, 149])
x: torch.Size([50, 64, 74])
x: torch.Size([50, 64, 37])
x: torch.Size([50, 64, 18])
x: torch.Size([50, 64, 9])
x: torch.Size([50, 256])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (50x256 and 576x200)

In [None]:
# Assume input_tensor is the output of your convolutional layers with shape [50, 64, 9]
input_tensor = torch.randn(50, 64, 9)

# Flatten the last two dimensions of the tensor
flattened = input_tensor.view(50, -1)  # Reshapes to [50, 576]