In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, r2_score

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


  return torch._C._cuda_getDeviceCount() > 0


In [4]:
def create_windows_np(data, window_size, stride):
    num_samples, num_channels = data.shape
    num_windows = (num_samples - window_size) // stride + 1

    shape = (num_windows, window_size, num_channels)
    strides = (data.strides[0] * stride, data.strides[0], data.strides[1])

    windows = np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)

    # transpose the windows array to the desired shape
    windows = np.transpose(windows, axes=(0, 2, 1))

    return windows

In [4]:
def noise_transform_vectorized(X, sigma=0.05):
    """
    Adding random Gaussian noise with mean 0
    """
    noise = np.random.normal(loc=0, scale=sigma, size=X.shape)
    return X + noise

def scaling_transform_vectorized(X, sigma=0.1):
    """
    Scaling by a random factor
    """
    scaling_factor = np.random.normal(loc=1.0, scale=sigma, size=(X.shape[0], 1, X.shape[2]))
    return X * scaling_factor

def negate_transform_vectorized(X):
    """
    Inverting the signals
    """
    return X * -1

def time_flip_transform_vectorized(X):
    """
    Reversing the direction of time
    """
    return X[:, ::-1, :]

In [5]:
# Define the list of transformations to be applied
transformations = [
    lambda x: noise_transform_vectorized(x), 
    lambda x: scaling_transform_vectorized(x),
    lambda x: negate_transform_vectorized(x),
    lambda x: time_flip_transform_vectorized(x),
]

In [6]:
def add_transformations(df):

    user_data = create_windows_np(df.loc[:, ['x', 'y', 'z']].values.astype(np.float32), 100, 50)

    # Get the number of windows and window size for the user's data
    num_windows, _, _ = user_data.shape

    # Apply the transformations to the user's data
    transformed_data = np.concatenate([transform_fn(user_data) for transform_fn in transformations], axis=0)
    transformed_data = np.concatenate([transformed_data, user_data], axis=0)
    transformed_data = np.array(transformed_data)

    # Create the labels for the transformed data
    transformed_labels = np.array([False for _ in range(4)])
    transformed_labels = np.append(transformed_labels, True)
    transformed_labels = np.repeat(transformed_labels, num_windows)

    return {
        'X': transformed_data,
        'y': transformed_labels
    }

In [7]:
class MultiTaskTPN(nn.Module):
    def __init__(self, num_tasks=len(transformations), num_channels=3):
        super(MultiTaskTPN, self).__init__()
        self.conv1 = nn.Conv1d(num_channels, 32, kernel_size=24, stride=1)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=16, stride=1)
        self.conv3 = nn.Conv1d(64, 96, kernel_size=8, stride=1)
        self.dropout = nn.Dropout(p=0.1)

        self.task_heads = nn.ModuleList([nn.Sequential(
            nn.Linear(96, 256),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(256, 1),
            nn.Sigmoid()
        ) for _ in range(num_tasks)])

    def forward(self, x):
        x = self.dropout(nn.functional.relu(self.conv1(x)))
        x = self.dropout(nn.functional.relu(self.conv2(x)))
        x = self.dropout(nn.functional.relu(self.conv3(x)))
        x = nn.functional.max_pool1d(x, x.size(2)).squeeze(2)

        logits = [task_head(x).view(-1, 1) for task_head in self.task_heads]
        return logits


In [8]:
path = "../capture24"
batch_size = 256

# Create the model
model = MultiTaskTPN().to(device)

# Define the loss function and optimizer
loss_fn = nn.BCELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

# Training loop
num_epochs = 1
for epoch in range(num_epochs):
    
    for i in range(20):

        # ignore subjects with missing values
        if i == 7 or i == 15 or i == 16:
            continue

        df = pd.read_feather(f'{path}/training.feather', columns=['x', 'y', 'z', 'user_id'])

        # Get the data for the current user
        user_data = add_transformations(df.loc[df['user_id'] == i+1])

        # Create a DataLoader to iterate over the test data in batches
        train_dataset = TensorDataset(torch.tensor(user_data['X'], dtype=torch.float32), 
                                    torch.tensor(user_data['y'], dtype=torch.float32))
        
        # Create a DataLoader to iterate over the test data in batches
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        

        # Training loop
        for batch_idx, (data, labels) in enumerate(train_dataloader):
            data, labels = data.to(device), labels.to(device)

            optimizer.zero_grad()

            logits = model(data)

            losses = []

            # Calculate the loss for each task
            for logit in logits:
                losses.append(loss_fn(logit, labels.view(-1, 1)))
            
            # Sum the losses to get the total loss
            total_loss = sum(losses)
            
            total_loss.backward()
            optimizer.step()
            
            if batch_idx % 100 == 0:
                print(f"Epoch: {epoch+1}/{num_epochs}, Subject: P{i+1:03d}, Batch: {batch_idx}, Loss: {total_loss.item()}")

        del df


Epoch: 1/1, Subject: P001, Batch: 0, Loss: 2.790815830230713
Epoch: 1/1, Subject: P001, Batch: 100, Loss: 1.666286826133728
Epoch: 1/1, Subject: P001, Batch: 200, Loss: 1.863908052444458
Epoch: 1/1, Subject: P001, Batch: 300, Loss: 1.7330677509307861
Epoch: 1/1, Subject: P001, Batch: 400, Loss: 1.7743134498596191
Epoch: 1/1, Subject: P001, Batch: 500, Loss: 1.8124644756317139
Epoch: 1/1, Subject: P001, Batch: 600, Loss: 1.717450499534607
Epoch: 1/1, Subject: P001, Batch: 700, Loss: 1.7534329891204834
Epoch: 1/1, Subject: P001, Batch: 800, Loss: 1.6110481023788452
Epoch: 1/1, Subject: P001, Batch: 900, Loss: 1.7358132600784302
Epoch: 1/1, Subject: P001, Batch: 1000, Loss: 1.5451791286468506
Epoch: 1/1, Subject: P001, Batch: 1100, Loss: 1.6510359048843384
Epoch: 1/1, Subject: P001, Batch: 1200, Loss: 1.6804206371307373
Epoch: 1/1, Subject: P001, Batch: 1300, Loss: 1.6861107349395752
Epoch: 1/1, Subject: P001, Batch: 1400, Loss: 1.7083008289337158
Epoch: 1/1, Subject: P001, Batch: 1500, L

In [9]:
# Save the model's state dictionary
torch.save(model.state_dict(), 'multi_task_tpn.pth')

In [26]:
def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    accuracy = 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for data, labels in dataloader:
            data, labels = data.to(device), labels.to(device)

            logits = model(data)

            # Concatenate the logits from each task
            combined_logits = np.concatenate([logit.cpu().numpy() for logit in logits], axis=1)

            # Calculate the accuracy by comparing the highest logit with the correct label
            pred = np.around(np.amax(combined_logits, axis=1))
            correct += np.sum(pred == labels.cpu().numpy())
            total += len(labels)

            all_preds.extend(pred)
            all_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return accuracy, f1

In [28]:
val_df = pd.read_feather(f'{path}/test.feather', columns=['x', 'y', 'z', 'user_id'])

# Get the validation data and apply transformations
val_data = add_transformations(val_df)
    
# Create a DataLoader to iterate over the validation data in batches
val_dataset = TensorDataset(torch.tensor(val_data['X'], dtype=torch.float32), 
                            torch.tensor(val_data['y'], dtype=torch.float32))
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

val_accuracy, val_f1 = evaluate(model, val_dataloader, device)
print(f"Validation accuracy: {val_accuracy * 100:.2f}%, F1: {val_f1 * 100:.2f}%")

del val_df
del val_data
del val_dataset
del val_dataloader
torch.cuda.empty_cache()

Validation accuracy: 90.81%, F1: 90.35%
