In [1]:
import json
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

healthy_path = Path("~/workspace/datasets/gaitdata/outputs/healthy_converted").expanduser()
unhealthy_path = Path("~/workspace/datasets/gaitdata/outputs/unhealthy_converted_final").expanduser()

healthy_files = list(healthy_path.glob("**/*.json"))
unhealthy_files = list(unhealthy_path.glob("**/*.json"))

healthy_inputs = []
unhealthy_inputs = []

for file in healthy_files:
    data = []
    for line in open(file):
        data.append(json.loads(line))
    try:
        data = torch.tensor(data, dtype=torch.float32)
    except:
        continue
    data = data.view(data.shape[0], -1)
    healthy_inputs.append(data)

for file in unhealthy_files:
    data = []
    for line in open(file):
        data.append(json.loads(line))
    try:
        data = torch.tensor(data, dtype=torch.float32)
    except:
        continue
    data = data.view(data.shape[0], -1)
    unhealthy_inputs.append(data)

In [3]:
# build dataset, healthy = 0, unhealthy = 1
dataset = []
for data in healthy_inputs:
    dataset.append((data, F.one_hot(torch.tensor(0), num_classes=2)))
for data in unhealthy_inputs:
    dataset.append((data, F.one_hot(torch.tensor(1), num_classes=2)))

import random
random.shuffle(dataset)
train_dataset = dataset[:int(len(dataset) * 0.8)]
test_dataset = dataset[int(len(dataset) * 0.8):]
# build dataloader
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

189


In [4]:
print(len(dataset))
print(len(healthy_inputs))
print(len(unhealthy_inputs))

189
157
32


In [5]:
class GaitClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes=2):
        super(GaitClassifier, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        # input_shape (seq_len, input_size)
        # output_shape (num_classes)
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, hidden_size * 4)
        self.fc2 = nn.Linear(hidden_size * 4, hidden_size * 4)
        self.fc3 = nn.Linear(hidden_size * 4, hidden_size)
        self.fc4 = nn.Linear(hidden_size, num_classes)

        # print parameter count
        print(f"parameter count: {sum(p.numel() for p in self.parameters())}")

    
    def forward(self, x):
        # x shape (seq_len, input_size)
        # out shape (hidden_size)
        out, _ = self.lstm(x)
        out = self.fc1(out[-1, :])
        out = F.relu(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.fc3(out)
        out = F.relu(out)
        out = self.fc4(out)
        return out


In [6]:
model = GaitClassifier(26, 4096, 2)
model = model.to("mps")
model.train()

parameter count: 470265858


GaitClassifier(
  (lstm): LSTM(26, 4096, batch_first=True)
  (fc1): Linear(in_features=4096, out_features=16384, bias=True)
  (fc2): Linear(in_features=16384, out_features=16384, bias=True)
  (fc3): Linear(in_features=16384, out_features=4096, bias=True)
  (fc4): Linear(in_features=4096, out_features=2, bias=True)
)

In [7]:
def eval():
    correct = 0
    total = 0
    with torch.no_grad():
        for data, label in tqdm(dataloader):
            data: torch.Tensor = data.to("mps")
            label: torch.Tensor = label.to("mps")
            data = data.squeeze(0)
            label = label.squeeze(0).float()
            label = torch.argmax(label)
            output = model(data)
            predicted = torch.argmax(output)
            total += 1
            if label.item() == predicted.item():
                correct += 1
    return correct / total

In [8]:
loss_datas = []
accuracy_datas = []
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
counter = 0
criterion = nn.CrossEntropyLoss()
run_name = "lstm_4096_lr_5e-5"
for epoch in tqdm(range(10)):
    curr_loss = 0
    for data, label in dataloader:
        data: torch.Tensor = data.to("mps")
        label: torch.Tensor = label.to("mps")
        data = data.squeeze(0)
        label = label.squeeze(0).float()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        counter += 1
        curr_loss += loss.item()
        loss_datas.append(loss.item())
        if counter % 10 == 0:
            print(f"Epoch: {epoch}, Loss: {curr_loss / 10}, Step: {counter}")
            curr_loss = 0
    # run eval
    accuracy = eval()
    print(f"Epoch: {epoch}, Accuracy: {accuracy}")
    accuracy_datas.append(accuracy)
    # save model
    torch.save(model.state_dict(), f"checkpoints/{run_name}_epoch_{epoch}.pth")

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0, Loss: 0.5048663496971131, Step: 10
Epoch: 0, Loss: 0.5132616519927978, Step: 20
Epoch: 0, Loss: 0.4132616549730301, Step: 30
Epoch: 0, Loss: 0.4132616549730301, Step: 40
Epoch: 0, Loss: 0.6132616490125656, Step: 50
Epoch: 0, Loss: 0.4132616549730301, Step: 60
Epoch: 0, Loss: 0.6132616490125656, Step: 70
Epoch: 0, Loss: 0.4132616549730301, Step: 80
Epoch: 0, Loss: 0.4132616549730301, Step: 90
Epoch: 0, Loss: 0.31326165795326233, Step: 100
Epoch: 0, Loss: 0.7132616460323333, Step: 110
Epoch: 0, Loss: 0.4132616549730301, Step: 120
Epoch: 0, Loss: 0.5132616519927978, Step: 130
Epoch: 0, Loss: 0.5132616519927978, Step: 140
Epoch: 0, Loss: 0.6132616490125656, Step: 150
Epoch: 0, Loss: 0.4132616549730301, Step: 160
Epoch: 0, Loss: 0.5132616519927978, Step: 170
Epoch: 0, Loss: 0.5132616519927978, Step: 180


100%|██████████| 189/189 [00:41<00:00,  4.55it/s]


Epoch: 0, Accuracy: 0.8306878306878307


 10%|█         | 1/10 [02:51<25:41, 171.23s/it]

Epoch: 1, Loss: 0.03132616579532623, Step: 190
Epoch: 1, Loss: 0.4132616549730301, Step: 200


100%|██████████| 189/189 [00:41<00:00,  4.59it/s]

Accuracy: 83.06878306878306





In [8]:
print(f"Accuracy: {100 * correct / total}")

Accuracy: 83.06878306878306


In [15]:
model = GaitClassifier(26, 4096, 2)
model.load_state_dict(torch.load("modeltraintest_2.pth"), strict=True)
model = model.to("mps")

parameter count: 470265858
