# I-MOTION Model 

In [1]:
import time

import torch
import torch.nn as nn
import random
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

# Set a fixed random seed for reproducibility across multiple libraries
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
window_size = 600
# Check for CUDA (GPU support) and set device accordingly
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # For multi-GPU setups
    # Additional settings for ensuring reproducibility on CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    device = torch.device("cpu")
    print("CUDA not available. Using CPU.")

CUDA not available. Using CPU.


## Load Data

In [2]:
train_data_x = np.loadtxt("./data/Acc_x.txt", delimiter=" ")
train_data_y = np.loadtxt("./data/Acc_y.txt", delimiter=" ")
train_data_z = np.loadtxt("./data/Acc_z.txt", delimiter=" ")
train_label = np.loadtxt("./data/train_label.txt", delimiter=" ")
train_order = np.loadtxt("./data/train_order.txt", dtype=float)
acc_data = np.dstack((train_data_x, train_data_y, train_data_z, train_label))
acc_data = acc_data[train_order.astype(np.int_).flatten() - 1, :, :]
window_data = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), window_size, 3), dtype=np.float32)
windowed_labels = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), 1), dtype=np.float32)

window_index = 0
for frame_index in range(acc_data.shape[0]):
    
    frame_features = acc_data[frame_index, :, :3]
    frame_labels = acc_data[frame_index, :, 3]
    
    for start in range(0, acc_data.shape[1], window_size):
        end = start + window_size
        window_data[window_index] = frame_features[start:end, :]
        # Assuming all samples in a window have the same label, take the label of the first sample
        windowed_labels[window_index] = frame_labels[start]
        window_index += 1
        
test_data_x = np.loadtxt("./data//test_Acc_x.txt", delimiter=" ")
test_data_y = np.loadtxt("./data/test_Acc_y.txt", delimiter=" ")
test_data_z = np.loadtxt("./data/test_Acc_z.txt", delimiter=" ")
test_label = np.loadtxt("./data/test_label.txt", delimiter=" ")
test_order = np.loadtxt("./data/test_order.txt", dtype=float)
acc_data = np.dstack((test_data_x, test_data_y, test_data_z, test_label))
acc_data = acc_data[test_order.astype(np.int_).flatten() - 1, :, :]
test_window_data = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), window_size, 3), dtype=np.float32)
test_windowed_labels = np.empty((acc_data.shape[0] * (acc_data.shape[1]//window_size), 1), dtype=np.float32)

window_index = 0
for frame_index in range(acc_data.shape[0]):
    
    frame_features = acc_data[frame_index, :, :3]
    frame_labels = acc_data[frame_index, :, 3]
    
    for start in range(0, acc_data.shape[1], window_size):
        end = start + window_size
        test_window_data[window_index] = frame_features[start:end, :]
        # Assuming all samples in a window have the same label, take the label of the first sample
        test_windowed_labels[window_index] = frame_labels[start]
        window_index += 1

# Verify the shapes
print("Train Features shape:", window_data.shape)
print("Train Labels shape:", windowed_labels.shape)
print("Test Features shape:", test_window_data.shape)
print("Test Labels shape:", test_windowed_labels.shape)
    

Train Features shape: (163100, 600, 3)
Train Labels shape: (163100, 1)
Test Features shape: (56980, 600, 3)
Test Labels shape: (56980, 1)


In [12]:
# Load train data
train_data_x = np.loadtxt("./data/Acc_x.txt", delimiter=" ")
train_data_y = np.loadtxt("./data/Acc_y.txt", delimiter=" ")
train_data_z = np.loadtxt("./data/Acc_z.txt", delimiter=" ")
train_label = np.loadtxt("./data/train_label.txt", delimiter=" ")
train_order = np.loadtxt("./data/train_order.txt", dtype=float)
acc_data = np.dstack((train_data_x, train_data_y, train_data_z, train_label))
acc_data = acc_data[train_order.astype(np.int_).flatten() - 1, :, :]

# Calculate window data and labels for train data
window_data = np.empty((acc_data.shape[0] * (acc_data.shape[1] // window_size), window_size, 3), dtype=np.float32)
windowed_labels = np.empty((acc_data.shape[0] * (acc_data.shape[1] // window_size), 1), dtype=np.float32)

window_index = 0
for frame_index in range(acc_data.shape[0]):
    frame_features = acc_data[frame_index, :, :3]
    frame_labels = acc_data[frame_index, :, 3]
    for start in range(0, acc_data.shape[1], window_size):
        end = start + window_size
        window_data[window_index] = frame_features[start:end, :]
        windowed_labels[window_index] = frame_labels[start]
        window_index += 1

# Reshape and combine train data
reshaped_train_data = window_data.reshape(-1, window_size * 3)
combined_train_data = np.hstack((reshaped_train_data, windowed_labels))

# Save train data to CSV
pd.DataFrame(combined_train_data).to_csv('train_data.csv', index=False, header=True)

# Verify the shapes
print("Train Features shape:", window_data.shape)
print("Train Labels shape:", windowed_labels.shape)
print(combined_train_data.shape)

Train Features shape: (326200, 300, 3)
Train Labels shape: (326200, 1)
Test Features shape: (113960, 300, 3)
Test Labels shape: (113960, 1)
(326200, 901)
[[ 2.047285  3.183076 10.796031 ...  3.2432   10.028386  5.      ]
 [ 1.844691  3.778356  9.555554 ...  2.31379   8.477245  5.      ]
 [ 1.475761  2.650088  9.071687 ...  3.246537  9.30294   5.      ]
 ...
 [-3.704407  0.064732 -9.034916 ...  0.237594 -9.693662  5.      ]
 [-3.394724 -0.0289   -9.846091 ... -0.029576 -9.040967  5.      ]
 [-3.621687 -0.073742 -8.714023 ... -0.017882 -9.637794  5.      ]]


In [16]:
print(combined_train_data.shape)

(326200, 901)
(113960, 901)


In [1]:
import pandas as pd

# Load the CSV file without headers
data = pd.read_csv('train_data.csv', header=None)

# Generate column names for the accelerometer data and label
column_names = [f'Acc_{i//3+1}_{["x", "y", "z"][i%3]}' for i in range(900)]
column_names.append('Label')

# Set the column names to the DataFrame
data.columns = column_names

# Convert 'Label' column to integer type
data['Label'] = data['Label'].astype(int)

# Save the DataFrame back to a CSV file with column names
data.to_csv('train_data_with_headers.csv', index=False)

# Verify the first few rows and data types to ensure everything is set up correctly
print(data.head())
print(data.dtypes)

    Acc_1_x    Acc_1_y   Acc_1_z   Acc_2_x    Acc_2_y   Acc_2_z   Acc_3_x  \
0 -2.367480   9.603528 -1.200628 -2.442098   9.238089 -1.462477 -2.774196   
1 -0.478840   9.461885  0.900801 -0.446564   9.511542  0.975061 -0.433618   
2 -1.367722   9.750951  0.306458 -1.334744   9.776136  0.295097 -1.316464   
3 -0.374039   7.868162  0.633156 -0.464943   8.151039  0.820221 -0.482534   
4  2.160904  13.322191 -1.865706  2.389481  12.746821 -1.159406  2.072968   

     Acc_3_y   Acc_3_z   Acc_4_x  ...  Acc_298_x  Acc_298_y  Acc_298_z  \
0   8.611417 -1.739763 -3.246977  ...  -0.702284   9.517571   0.879291   
1   9.535838  0.963800 -0.402226  ...  -1.525542   9.455682   0.303084   
2   9.764919  0.287304 -1.258937  ...  -0.058308   8.344917   0.338663   
3   8.718449  0.901451 -0.685306  ...   0.137368  12.029835  -2.380630   
4  12.296709 -1.122953  2.250746  ...  -2.110125   7.896227  -1.579852   

   Acc_299_x  Acc_299_y  Acc_299_z  Acc_300_x  Acc_300_y  Acc_300_z  Label  
0  -0.585128   

In [20]:
combined_data.columns = combined_data.iloc[0]
combined_data = combined_data[1:].reset_index(drop=True)

# Display the updated DataFrame
print(combined_data.head())

0   Acc_1_x    Acc_1_y   Acc_1_z   Acc_2_x    Acc_2_y   Acc_2_z   Acc_3_x  \
0  -2.36748   9.603528 -1.200628 -2.442098   9.238089 -1.462477 -2.774196   
1  -0.47884   9.461885  0.900801 -0.446564   9.511542  0.975061 -0.433618   
2 -1.367722   9.750951  0.306458 -1.334744   9.776136  0.295097 -1.316464   
3 -0.374039   7.868162  0.633156 -0.464943   8.151039  0.820221 -0.482534   
4  2.160904  13.322191 -1.865706  2.389481  12.746821 -1.159406  2.072968   

0    Acc_3_y   Acc_3_z   Acc_4_x  ... Acc_298_x  Acc_298_y Acc_298_z  \
0   8.611417 -1.739763 -3.246977  ... -0.702284   9.517571  0.879291   
1   9.535838    0.9638 -0.402226  ... -1.525542   9.455682  0.303084   
2   9.764919  0.287304 -1.258937  ... -0.058308   8.344917  0.338663   
3   8.718449  0.901451 -0.685306  ...  0.137368  12.029835  -2.38063   
4  12.296709 -1.122953  2.250746  ... -2.110125   7.896227 -1.579852   

0 Acc_299_x  Acc_299_y Acc_299_z Acc_300_x Acc_300_y Acc_300_z Label  
0 -0.585128   9.503646  0.857516 

In [26]:
combined_data.to_csv('train_data_with_headers_no.csv', index=False, header=False)

In [24]:
print(combined_data.columns)

Index(['Acc_1_x', 'Acc_1_y', 'Acc_1_z', 'Acc_2_x', 'Acc_2_y', 'Acc_2_z',
       'Acc_3_x', 'Acc_3_y', 'Acc_3_z', 'Acc_4_x',
       ...
       'Acc_298_x', 'Acc_298_y', 'Acc_298_z', 'Acc_299_x', 'Acc_299_y',
       'Acc_299_z', 'Acc_300_x', 'Acc_300_y', 'Acc_300_z', 'Label'],
      dtype='object', name=0, length=901)


## Helper Function

In [3]:
def plot_training_losses(train_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses)
    plt.title('Training Loss Over Epochs', fontsize=16)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.legend()
    plt.grid(True)
    plt.show()


def calculate_padding(kernel_size):
    return (kernel_size - 1) // 2

def calculate_output_length(input_length, kernel_size, stride, padding):
    return ((input_length + 2 * padding - kernel_size) // stride) + 1

def calculate_pooling_padding(window_size, stride, input_size, output_size):
    return ((output_size - 1) * stride - input_size + window_size) // 2

## Preprocessing

In [3]:
import time
from scipy.signal import butter, filtfilt
def preprocess_accelerometer_data(accel_data, m=5, cutoff=0.001, fs=100, order=5):
    # Initialize gravity and linear acceleration
    
    # Step 1: Remove Gravity
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    linear_acceleration = filtfilt(b, a, accel_data, axis=0)
    
    # Step 2: Smooth Data
    # Create a moving average (MA) filter
    window = np.ones(m) / m
    # Apply the moving average filter to each column
    smoothed_data = np.zeros_like(linear_acceleration)
    for i in range(linear_acceleration.shape[1]):
        smoothed_data[:, i] = np.convolve(linear_acceleration[:, i], window, mode='same')

    # Step 3: Calculate Magnitude
    magnitudes = np.sqrt(np.sum(smoothed_data**2, axis=1))
    return magnitudes


# window_data size is 163100 x 600 x 3.
train_processed_data = np.array([preprocess_accelerometer_data(window_data[i, :, :]) for i in range(window_data.shape[0])])
test_processed_data = np.array([preprocess_accelerometer_data(test_window_data[i, :, :]) for i in range(test_window_data.shape[0])])
print(train_processed_data.shape)  # Output the shape of the processed data
print(test_processed_data.shape)

(163100, 600)
(56980, 600)


In [4]:
print(train_processed_data.shape) 
print(windowed_labels.shape)

(163100, 600)
(163100, 600, 3)
(163100, 1)


In [5]:
feature_columns = [f'acc_{i+1}' for i in range(train_processed_data.shape[1])]
# Label column
label_column = ['label']

# All columns
columns = feature_columns + label_column

# Step 3: Combine Features and Labels
# Combine features and labels into a single DataFrame
combined_data = np.hstack((train_processed_data, windowed_labels))
df = pd.DataFrame(combined_data, columns=columns)

df['label'] = df['label'].astype(int)

# Step 4: Save to CSV File
df.to_csv('combined_data.csv', index=False)
print(df.head())
print(df.dtypes)

      acc_1     acc_2     acc_3     acc_4     acc_5     acc_6     acc_7  \
0  3.735548  4.948487  6.411320  6.619566  6.842094  7.112462  7.359571   
1  3.872650  5.152979  6.423147  6.386667  6.343330  6.319507  6.327529   
2  4.474486  5.870930  7.314752  7.093672  6.896525  6.703823  6.217646   
3  3.723725  5.046302  6.432362  6.671743  6.930723  7.220342  7.515652   
4  3.919540  5.192972  6.446633  6.381846  6.298956  6.238860  6.211018   

      acc_8     acc_9    acc_10  ...   acc_592   acc_593   acc_594   acc_595  \
0  7.400199  7.407766  7.372681  ...  6.480283  6.513780  6.513585  6.490097   
1  6.344512  6.371496  6.384150  ...  5.520541  5.249839  4.982332  5.092405   
2  5.528437  4.674045  3.693581  ...  6.199013  6.201717  6.144378  6.026480   
3  7.797378  8.033084  8.155891  ...  6.404034  6.434877  6.483223  6.540555   
4  6.215341  6.243834  6.298971  ...  6.745467  6.709211  6.671676  6.623878   

    acc_596   acc_597   acc_598   acc_599   acc_600  label  
0  6.44

## Model Construction

In [6]:
class IMOTION_CNN(nn.Module):
    def __init__(self, in_feature=600, in_channel=1, out_feature=8, pool_window_size=4, pool_stride_size=2, 
                 pool_padding=1, conv_filter_list=[32, 64, 64, 64, 64, 64], conv_kernel_list=[15, 10, 10, 5, 5, 5], conv_stride=1, full_connection_size=200):
        super(IMOTION_CNN, self).__init__()
        self.pool = nn.MaxPool1d(kernel_size=pool_window_size, stride=pool_stride_size, padding=pool_padding)
        assert len(conv_filter_list) == len(conv_kernel_list)
        self.conv_layers = nn.ModuleList([
            nn.Conv1d(in_channels=in_channel if i == 0 else conv_filter_list[i-1], 
                      out_channels=conv_filter_list[i], 
                      kernel_size=conv_kernel_list[i], stride=conv_stride, padding=calculate_padding(conv_kernel_list[i]))
            for i in range(len(conv_filter_list))
        ])
        self.final_matrix_size = conv_filter_list[-1] * (in_feature // (2 ** (len(conv_filter_list))))
        self.fc1 = nn.Linear(self.final_matrix_size, full_connection_size) 
        self.fc2 = nn.Linear(full_connection_size, out_feature)


    def forward(self, x):
        for conv in self.conv_layers:
            x = self.pool(conv(x))
            
        x = x.view(x.shape[0], -1)  # Flattening the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Dataset

In [7]:
# Define a custom Dataset
class AccDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)  # Convert features to PyTorch tensors
        self.labels = torch.tensor(labels, dtype=torch.long) - 1       # Convert labels to PyTorch tensors
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

def prepare_data_loaders(train_features, train_labels, test_features, test_labels, batch_size=10):

    train_dataset = AccDataset(train_features, train_labels)
    test_dataset = AccDataset(test_features, test_labels)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

## Training & Evaluation

In [10]:
def train_model(model, train_loader, optimizer, criterion, epochs=100, device='cpu'):
    model.train()  # Set the model to training mode
    loss_values = []  # Initialize a list to store the average loss per epoch

    for epoch in range(epochs):
        total_loss = 0  # Track total loss for each epoch
        for X, labels in train_loader:
         
            # Move data to the specified device
            X, labels = X.to(device), labels.to(device)
            X = X.unsqueeze(1)
            optimizer.zero_grad()  # Clear gradients for the next train step
            output = model(X)  # Forward pass
            labels = labels.squeeze(1) 
            loss = criterion(output, labels)  # Compute the loss
            loss.backward()  # Backward pass to compute gradients
            optimizer.step()  # Update model parameters
            total_loss += loss.item()  # Accumulate the loss
         
        avg_loss = total_loss / len(train_loader)  # Calculate average loss
        loss_values.append(avg_loss)  # Append average loss to list

        # Print the average loss for the current epoch
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')

    return loss_values


def evaluate_model(model, test_loader, device='cpu'):
    model.eval()  # Set the model to evaluation mode
    true_labels = []  # List to store actual labels
    predictions = []  # List to store model predictions

    with torch.no_grad():  # Disable gradient computation
        for X, labels in test_loader:
            # Move data to the specified device
            X, labels = X.to(device), labels.to(device)

            output = model(X)  # Forward pass
            _, predicted = torch.max(output.data, 1)  # Get the index of the max log-probability

            true_labels += labels.tolist()  # Append actual labels
            predictions += predicted.tolist()  # Append predicted labels

    # Calculate evaluation metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')
    f1 = f1_score(true_labels, predictions, average='weighted')

    return accuracy, precision, recall, f1

## Main Script

In [11]:
def main():
    """
    Main execution function to train and evaluate Graph Convolutional Network (GCN) models
    with different graph normalization techniques, visualize training metrics, and perform
    embedding analysis through PCA.

    Assumes the presence of a GCN model class, data loader preparation functions, and
    various normalization technique functions defined outside this script.
    """
    # Configuration parameters
    num_epochs = 200  # Number of training epochs
    # Dictionary mapping normalization technique names to their corresponding functions

    # Lists for storing evaluation metrics and model information
    metric_values = [[] for _ in range(4)]  # Lists to store Accuracy, Precision, Recall, F1 Score
    train_losses = []  # Training loss values for each normalization technique

    # Set the computation device (GPU or CPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"\nTraining model")
    # Prepare data loaders
    train_loader, test_loader = prepare_data_loaders(train_processed_data, windowed_labels, test_processed_data, test_windowed_labels, batch_size=100)
    print(f"DataLoader batch size: {train_loader.batch_size}")

    # Initialize the GCN model, optimizer, and loss criterion
    model = IMOTION_CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Train the model
    train_losses = train_model(model, train_loader, optimizer, criterion, epochs=num_epochs, device=device)

    # Evaluate the model's performance
    accuracy, precision, recall, f1 = evaluate_model(model, test_loader, device=device)
    # Store the evaluation metrics
    metric_values[0].append(accuracy)
    metric_values[1].append(precision)
    metric_values[2].append(recall)
    metric_values[3].append(f1)

    # Output the evaluation results
    print(f"Results  - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    # Visualization of training losses and evaluation metrics for each normalization technique
    metric_names = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
    plot_training_losses(train_losses)

if __name__ == "__main__":
    main()



Training model
DataLoader batch size: 100
Epoch 1/200, Loss: 1.0091
Epoch 2/200, Loss: 0.8350
Epoch 3/200, Loss: 0.7858
Epoch 4/200, Loss: 0.7485
Epoch 5/200, Loss: 0.7171
Epoch 6/200, Loss: 0.7190
Epoch 7/200, Loss: 0.6916
Epoch 8/200, Loss: 0.6754
Epoch 9/200, Loss: 0.6637
Epoch 10/200, Loss: 0.6428


KeyboardInterrupt: 

In [None]:
# Assume input_tensor is the output of your convolutional layers with shape [50, 64, 9]
input_tensor = torch.randn(50, 64, 9)

# Flatten the last two dimensions of the tensor
flattened = input_tensor.view(50, -1)  # Reshapes to [50, 576]