In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, r2_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
class MultiTaskTPN(nn.Module):
    def __init__(self, num_tasks=4, num_channels=3):
        super(MultiTaskTPN, self).__init__()
        self.conv1 = nn.Conv1d(num_channels, 32, kernel_size=24, stride=1)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=16, stride=1)
        self.conv3 = nn.Conv1d(64, 96, kernel_size=8, stride=1)
        self.dropout = nn.Dropout(p=0.1)

        self.task_heads = nn.ModuleList([nn.Sequential(
            nn.Linear(96, 256),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(256, 1),
            nn.Sigmoid()
        ) for _ in range(num_tasks)])

    def forward(self, x):
        x = self.dropout(nn.functional.relu(self.conv1(x)))
        x = self.dropout(nn.functional.relu(self.conv2(x)))
        x = self.dropout(nn.functional.relu(self.conv3(x)))
        x = nn.functional.max_pool1d(x, x.size(2)).squeeze(2)

        logits = [task_head(x).view(-1, 1) for task_head in self.task_heads]
        return logits


In [34]:
class EnergyExpenditureTPN(MultiTaskTPN):
    def __init__(self, num_channels=3):
        super(EnergyExpenditureTPN, self).__init__(num_tasks=1, num_channels=num_channels)
        
        # Remove the dropout layers from task_heads
        self.task_heads = nn.ModuleList([
            nn.Sequential(
                nn.Linear(96, 256),
                nn.ReLU(),
                nn.Linear(256, 1)
            )
        ])
        
        # Freeze the feature extraction layers
        for param in self.conv1.parameters():
            param.requires_grad = False
        for param in self.conv2.parameters():
            param.requires_grad = False
        for param in self.conv3.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.dropout(nn.functional.relu(self.conv1(x)))
        x = self.dropout(nn.functional.relu(self.conv2(x)))
        x = self.dropout(nn.functional.relu(self.conv3(x)))
        x = nn.functional.max_pool1d(x, x.size(2)).squeeze(2)

        output = self.task_heads[0](x)
        return output

In [5]:
def create_windows_np(data, window_size, stride):
    num_samples, num_channels = data.shape
    num_windows = (num_samples - window_size) // stride + 1

    shape = (num_windows, window_size, num_channels)
    strides = (data.strides[0] * stride, data.strides[0], data.strides[1])

    windows = np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)

    # transpose the windows array to the desired shape
    windows = np.transpose(windows, axes=(0, 2, 1))

    return windows

In [6]:
# Create the model
loaded_model = MultiTaskTPN().to(device)

# Load the saved state dictionary
loaded_model.load_state_dict(torch.load('multi_task_tpn.pth'))

# Set the model to evaluation mode
loaded_model.eval()

MultiTaskTPN(
  (conv1): Conv1d(3, 32, kernel_size=(24,), stride=(1,))
  (conv2): Conv1d(32, 64, kernel_size=(16,), stride=(1,))
  (conv3): Conv1d(64, 96, kernel_size=(8,), stride=(1,))
  (dropout): Dropout(p=0.1, inplace=False)
  (task_heads): ModuleList(
    (0-3): 4 x Sequential(
      (0): Linear(in_features=96, out_features=256, bias=True)
      (1): ReLU()
      (2): Dropout(p=0.1, inplace=False)
      (3): Linear(in_features=256, out_features=1, bias=True)
      (4): Sigmoid()
    )
  )
)

In [14]:
def create_dataloader(i, df, batch_size):
    user_data = {
        'X': [],
        'y': []
    }

    # Get the data for the current user
    user_data['X'] = create_windows_np(df.loc[df['user_id'] == i+1, ['x', 'y', 'z']].values.astype(np.float32), 100, 50)

    # Get the labels for the current user
    y = df.loc[df['user_id'] == i+1, 'met_value_mean_values'].values.astype(np.float32)

    # Create the 50% overlapping labels windows size of 100 without create_windows_np or for loop
    user_data['y'] = np.array([y[i:i+100].mean() for i in range(0, len(y), 50)])[:-2]

    # Get the missing value indexes in the user_data['y']
    missing_value_indexes = np.argwhere(np.isnan(user_data['y']))

    # Remove the missing values from the user_data['y'] and user_data['X']
    user_data['y'] = np.delete(user_data['y'], missing_value_indexes)
    user_data['X'] = np.delete(user_data['X'], missing_value_indexes, axis=0)

    # Create a DataLoader to iterate over the test data in batches
    dataset = TensorDataset(torch.tensor(user_data['X'], dtype=torch.float32), 
                                torch.tensor(user_data['y'], dtype=torch.float32))
    
    # Create a DataLoader to iterate over the test data in batches
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [36]:
path = "../weee/dataset"
batch_size = 256

# Create the model
model = EnergyExpenditureTPN().to(device)

# Copy the weights from the saved MultiTaskTPN model to the new EnergyExpenditureTPN model
model.conv1.load_state_dict(loaded_model.conv1.state_dict())
model.conv2.load_state_dict(loaded_model.conv2.state_dict())
model.conv3.load_state_dict(loaded_model.conv3.state_dict())

# Define the loss function and optimizer
loss_fn = nn.L1Loss()

# Define the optimizer (only optimize the task head parameters)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
    
df = pd.read_csv(f'{path}/combined_e4_acc.csv')

# Training loop
num_epochs = 250
patience = 50
best_val_loss = float('inf')
epochs_without_improvement = 0

for epoch in range(num_epochs):

    # Iterate over training users
    for i in range (14):

        # Ignore two subjects because of their missing data
        if i+1 in [6, 14]:
            continue

        train_dataloader = create_dataloader(i, df, batch_size=batch_size)

        # Training loop
        for batch_idx, (data, labels) in enumerate(train_dataloader):
            data, labels = data.to(device), labels.to(device)

            optimizer.zero_grad()

            output = model(data)

            loss = loss_fn(output, labels.view(-1, 1))

            loss.backward()
            optimizer.step()

            # if batch_idx % 100 == 0:
            #     print(f"Epoch: {epoch+1}/{num_epochs}, Batch: {batch_idx}, Loss: {loss.item()}")

    # Iterate over validation users    
    val_loss = 0
    val_batches = 0
    for i in range (14, 16):

        validation_dataloader = create_dataloader(i, df, batch_size=batch_size)

        # Validation loop
        for batch_idx, (data, labels) in enumerate(validation_dataloader):
            data, labels = data.to(device), labels.to(device)

            output = model(data)

            loss = loss_fn(output, labels.view(-1, 1))

            val_loss += loss.item()
            val_batches += 1

    val_loss /= val_batches

    print(f"Epoch: {epoch+1}/{num_epochs}, Training Loss: {loss.item()}, Validation Loss: {val_loss}")

    # Save the model if validation loss improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "energy_expenditure_tpn_best.pth")
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    # Early stopping
    if epochs_without_improvement == patience:
        print("Early stopping")
        break
    

del df


Epoch: 1/250, Training Loss: 1.0398294925689697, Validation Loss: 1.4089439163605373
Epoch: 2/250, Training Loss: 0.8577562570571899, Validation Loss: 1.36596084634463
Epoch: 3/250, Training Loss: 0.9803221821784973, Validation Loss: 1.3152619749307632
Epoch: 4/250, Training Loss: 0.8033965229988098, Validation Loss: 1.2834422985712688
Epoch: 5/250, Training Loss: 0.9088019132614136, Validation Loss: 1.3180226335922878
Epoch: 6/250, Training Loss: 0.799903154373169, Validation Loss: 1.2665912260611851
Epoch: 7/250, Training Loss: 1.0082985162734985, Validation Loss: 1.3438113331794739
Epoch: 8/250, Training Loss: 0.989181399345398, Validation Loss: 1.2643354485432308
Epoch: 9/250, Training Loss: 1.092362880706787, Validation Loss: 1.3505771656831105
Epoch: 10/250, Training Loss: 0.904577374458313, Validation Loss: 1.265009770790736
Epoch: 11/250, Training Loss: 0.8848182559013367, Validation Loss: 1.3689132531483967
Epoch: 12/250, Training Loss: 0.8348012566566467, Validation Loss: 1.3

In [37]:
best_model = EnergyExpenditureTPN().to(device)
best_model.load_state_dict(torch.load("energy_expenditure_tpn_best.pth"))
df = pd.read_csv(f'{path}/combined_e4_acc.csv')

testing_dataloader = create_dataloader(16, df, batch_size=batch_size)

test_loss = 0
for data, labels in testing_dataloader:
    data, labels = data.to(device), labels.to(device)
    output = best_model(data)
    loss = loss_fn(output, labels.view(-1, 1))
    test_loss += loss.item()

test_loss /= len(testing_dataloader)
print(f"Test Loss: {test_loss}")

Test Loss: 1.5277511392320906
