In [2]:
import pandas as pd
import numpy as np
import ast
import re
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

In [3]:
df = pd.read_pickle('data/model_data_window_500_space_50.pkl')
df.head()

Unnamed: 0,id,sig_array,activity_name,subject_id,dataset
0,0,"[[-0.1832595, -0.2076941, -0.53145254, 9.77168...",squats,0,mmfit
1,1,"[[1.502728, -1.7263045, -1.4062113, 0.86939216...",squats,0,mmfit
2,2,"[[-0.21502449, -1.5980228, -1.4941758, 6.47373...",non-e,0,mmfit
3,3,"[[0.6059781, 0.07819072, -0.53633946, 7.046148...",non-e,0,mmfit
4,4,"[[0.09896013, -0.2724458, 0.53145254, 1.087338...",non-e,0,mmfit


In [5]:
name_fix_dict = {'non-e' : 'rest',
                 'nonexercise': 'rest',
                 'staticstretch(atyourownpace)': 'staticstretch',
                 'two-armdumbbellcurl(botharms,notalternating)': 'bicepcurls',
                 'wallballs': 'wallball',
                 'dumbbell_shoulder_press': 'dumbbellshoulderpress',
                 'lateral_shoulder_raises': 'lateralshoulderraises',
                 'sit-up(handspositionedbehindhead)': 'situps'}


df['activity_name'] = df['activity_name'].apply(lambda x: name_fix_dict[x] if x in name_fix_dict.keys() else x)

In [6]:
df['activity_name'].unique()

array(['squats', 'rest', 'pushups', 'dumbbellshoulderpress', 'lunges',
       'dumbbell_rows', 'situps', 'bicep_curls', 'lateralshoulderraises',
       'tricep_extensions', 'jumping_jacks', 'kbpress', 'boxjumps',
       'deadlifts', 'crunches', 'kbsquatpress', 'null', 'wallball',
       'burpees', 'pullups', 'bicepscurl(band)', 'ellipticalmachine',
       'staticstretch', 'burpee', 'bicepcurls',
       'dynamicstretch(atyourownpace)', 'walk', 'tricepextensions', 'dip',
       'plank', 'v-up', 'dumbbellrows', 'deviceontable',
       'kettlebellswing', 'russiantwist', 'crunch', 'seatedbackfly',
       'butterflysit-up', 'jumprope', 'fastalternatingpunches',
       'sideplankrightside', 'sideplankleftside', 'note',
       'taprightdevice', 'repetitivestretching', 'jumpingjacks',
       'powerboatpose', 'tapleftdevice', 'unlistedexercise',
       'running(treadmill)', 'armbandadjustment',
       'overheadtricepsextension(labelspansbotharms)', 'bandpull-downrow',
       'chestpress(rack)', 

In [7]:
df[df['activity_name']=='null']['dataset'].unique()

array(['har_data'], dtype=object)

In [8]:
# limit to labels with enough examples

counts = df['activity_name'].value_counts()
valid_activities = [activity for activity in counts.index.tolist() if counts[activity] >=1000]
df = df[df['activity_name'].isin(valid_activities)]

print(f"There are {len(df['activity_name'].unique())} unique activities in the dataset")

There are 33 unique activities in the dataset


In [9]:
# Create test train val split on the user level

# split test/train
train_users, test_users = train_test_split(df['subject_id'].unique(), train_size=.8, random_state=42)

train_data = df[df['subject_id'].isin(train_users)]
test_data = df[df['subject_id'].isin(test_users)]

# split train/val
train_users, val_users = train_test_split(train_data['subject_id'].unique(), train_size=.9, random_state=42)

val_data = train_data[train_data['subject_id'].isin(val_users)]
train_data = train_data[train_data['subject_id'].isin(train_users)]

print("The Sizes of the Train, Test, and Val Sets are:")
print(f"Train Size: {len(train_data)}")
print(f"Test Size: {len(test_data)}")
print(f"Val Size: {len(val_data)}")

The Sizes of the Train, Test, and Val Sets are:
Train Size: 102901
Test Size: 28716
Val Size: 9575


In [10]:
# Seperate X and Y components of data

X_train = np.array(train_data['sig_array'])
X_test = np.array(test_data['sig_array'])
X_Val = np.array(val_data['sig_array'])

y_train = np.array(train_data['activity_name'])
y_test = np.array(test_data['activity_name'])
y_val = np.array(val_data['activity_name'])

In [11]:
# Get Data in right format

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
y_val_encoded = label_encoder.transform(y_val)


X_train = np.stack(X_train).astype(np.float32)
X_test = np.stack(X_test).astype(np.float32)
X_Val = np.stack(X_Val).astype(np.float32)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
X_Val_tensor = torch.tensor(X_Val, dtype=torch.float32)

# Convert y arrays to tensors
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long)
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)
y_val_tensor = torch.tensor(y_val_encoded, dtype=torch.long)

In [12]:
from torch.utils.data import Dataset, DataLoader

class IMUDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).permute(0, 2, 1)  # Convert to tensor and swap axes
        self.y = torch.tensor(y, dtype=torch.long)  # Ensure labels are integers for classification

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create Dataset Objects
train_dataset = IMUDataset(X_train, y_train_encoded)
val_dataset = IMUDataset(X_Val, y_val_encoded)
test_dataset = IMUDataset(X_test, y_test_encoded)

# DataLoader
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [13]:
def compute_accuracy(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    return 100 * correct / total

<module 'model' from '/Users/jacobgottesman/Public/DS 4440/smartwatch-activity-recognition/model.py'>

In [None]:

import torch.optim as optim
import torch.nn as nn

from model import *


# Model setup
num_classes = len(np.unique(y_train_encoded))
window_length = 500  
device = torch.device("mps" if torch.mps.is_available() else "cpu")

model = IMUCNN(num_classes=num_classes, window_length=window_length).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
augmenter = TimeSeriesAugmentation()


# Training Loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:

        X_batch = augmenter.apply(X_batch)
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    #  Compute validation accuracy
    val_accuracy = compute_accuracy(model, val_loader, device)
    train_accuracy = compute_accuracy(model, train_loader, device)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, Train Accuracy {train_accuracy:.2f}%")


print("Training complete!")


Epoch [1/20], Loss: 1.1609, Val Accuracy: 80.63%, Train Accuracy 83.96%
Epoch [2/20], Loss: 0.6565, Val Accuracy: 85.59%, Train Accuracy 88.22%
Epoch [3/20], Loss: 0.5131, Val Accuracy: 88.28%, Train Accuracy 91.29%
Epoch [4/20], Loss: 0.4236, Val Accuracy: 88.67%, Train Accuracy 92.14%


In [21]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

def predict(model, dataloader, device):
    model.eval()  # Set model to evaluation mode
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)  # Get class with highest probability

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())

    return np.array(all_preds), np.array(all_labels)



In [33]:
# Get predictions
y_pred, y_true = predict(model, test_loader, device)

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)



# Print classification report
print("Classification Report:\n", classification_report(y_true, y_pred, labels=list(label_encoder.classes_)))


Classification Report:
                                precision    recall  f1-score   support

                   bicepcurls       0.00      0.00      0.00         0
                     boxjumps       0.00      0.00      0.00         0
                       burpee       0.00      0.00      0.00         0
                      burpees       0.00      0.00      0.00         0
              butterflysit-up       0.00      0.00      0.00         0
                       crunch       0.00      0.00      0.00         0
                     crunches       0.00      0.00      0.00         0
                    deadlifts       0.00      0.00      0.00         0
                deviceontable       0.00      0.00      0.00         0
                 dumbbellrows       0.00      0.00      0.00         0
        dumbbellshoulderpress       0.00      0.00      0.00         0
dynamicstretch(atyourownpace)       0.00      0.00      0.00         0
            ellipticalmachine       0.00      0.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [32]:
label_encoder.classes_

array(['bicepcurls', 'boxjumps', 'burpee', 'burpees', 'butterflysit-up',
       'crunch', 'crunches', 'deadlifts', 'deviceontable', 'dumbbellrows',
       'dumbbellshoulderpress', 'dynamicstretch(atyourownpace)',
       'ellipticalmachine', 'jumprope', 'kbpress', 'kbsquatpress',
       'lateralshoulderraises', 'lunges', 'null', 'plank', 'pullups',
       'pushups', 'rest', 'rowingmachine', 'running(treadmill)',
       'russiantwist', 'situps', 'squats', 'staticstretch',
       'tricepextensions', 'v-up', 'walk', 'wallball'], dtype=object)