TODO
- test with larger batches?
- reduce data input (3 sec -> 1 sec)
- run stylecheck + formatting to pretty up code

Link to dataset:\
https://github.com/mmalekzadeh/motion-sense/tree/master/data


In [None]:
# this project has some unusual version dependencies.
# you should run it within a fresh virtual environment
# you really should use the requirements.txt, but this will work as well
!pip3 install torch
!pip3 install tqdm
!pip3 install pandas
!pip3 install scikit-learn
!pip3 install tensorboard
!pip3 install matplotlib
!pip3 install seaborn

Imports

In [15]:
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pathlib
from pathlib import Path
import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
import seaborn as sn

Hyperparameters

In [3]:
# model settings and hyperparameters
BATCH_SIZE = 8
VALID_SPLIT = 0.20
EPOCHS = 30
LR = 0.00001
NUMBER_OF_FRAMES_EXAMINED = 150 #50hz, 150=3sec
MODEL_SAVE_DIR = "/Users/isaac/Desktop/Creative/Coding/Transformers/"
DATASET_DIR = "/Users/isaac/Desktop/Creative/Coding/Transformers/iPhone gyro"
dropout_p = 0.2
device = torch.device("cpu") # for mac, to use on GPU use "cuda" or "cpu" otherwise

# PyTorch Transformer Demo

a short test to import and verify the pytorch transformer (encoder + decoder) model

In [113]:
d_model=512 # the number of expected features in the encoder/decoder inputs
nhead=8 # the number of heads in the multiheadattention models
num_encoder_layers=6 # the number of sub-encoder-layers in the encoder
num_decoder_layers=6 # the number of sub-decoder-layers in the decoder
dim_feedforward=2048 # the dimension of the feedforward network model
dropout=0.1 # the dropout value
activation="relu" # the activation function of encoder/decoder intermediate layer
custom_encoder=None # custom encoder
custom_decoder=None # custom decoder
layer_norm_eps=1e-05 # the eps value in layer normalization components
batch_first=False # If True, then the input and output tensors are provided as (batch, seq, feature)
norm_first=False # if True, encoder and decoder layers will perform LayerNorms before other attention and feedforward operations, otherwise after
bias=True # If set to False, Linear and LayerNorm layers will not learn an additive bias
device=None
dtype=None

In [114]:
transformer_model = torch.nn.Transformer(d_model=18, nhead=6)

In [115]:
# test model inference
src = torch.rand((30, 32, 18)) # (S,N,E)
tgt = torch.rand((1, 32, 18)) # (T,N,E)
out = transformer_model(src, tgt)

In [105]:
out.shape # (T, N, E)
# S is the source sequence length
# T is the target sequence length
# N is the batch size
# E is the feature number

torch.Size([1, 32, 18])

# iPhone Motion Detector Model

In [4]:
class EncoderClassifier(torch.nn.Module):
    def __init__(self, input_features, num_classifications, num_layers, num_heads, LR=0.00001, device=torch.device("cpu")):
        super().__init__()
        # these are called sequentially in the forward() method
        self.encoder_layer = torch.nn.TransformerEncoderLayer(
            d_model=input_features,
            nhead=num_heads,
            batch_first=True
        )
        self.encoder = torch.nn.TransformerEncoder(
            encoder_layer=self.encoder_layer,
            num_layers=num_layers,
        )
        self.linear = torch.nn.Linear(input_features, num_classifications)
        self.dropout = torch.nn.Dropout(dropout_p)
        self.confusion_matrix = torch.zeros((num_classifications, num_classifications))
        self.device = device
        self.criterion = torch.nn.CrossEntropyLoss() # cross entropy for multiclass classification
        self.optimizer = torch.optim.Adam(
        self.parameters(),
        lr=LR
        ) # most used optimizer and learning rate with transformers from research I did
        # self.writer = SummaryWriter() <--- causes pytorch error. instantiate every time :eyeroll:


    def forward(self, x):
        x = self.encoder(x)
        x = self.dropout(x)
        x = x.max(dim=1)[0] # condenses [N, S, E] to [N, E]
        out = self.linear(x) # [N, E] -> [N, O]
        return out


    def postprocess(self, x, text_labels=False):
        x = torch.nn.functional.softmax(x, dim=1) # apply softmax to model output (cross entropy loss does this automatically in training)
        labels = x.max(dim=1)[1] # get classification for each batch of data
        if text_labels: # convert [0, 0, 1, ...] to ["dws", "dws", "ups", ...]
            action_dict = {
            0 : "dws",
            1 : "ups",
            2 : "wlk",
            3 : "jog",
            4 : "sit",
            5 : "std",
        }
            return [action_dict[label.item()] for label in labels]
        return labels


    # Training function.
    def train_run(self, trainloader):
        self.train()
        print('Training')
        train_running_loss = 0.0
        train_running_correct = 0
        counter = 0
        for data in tqdm(trainloader, total=len(trainloader)):
            counter += 1
            inputs, labels = data['data'], data['label']
            inputs = inputs.to(device)
            labels = labels.long().to(device)
            self.optimizer.zero_grad()
            # Forward pass
            outputs = self(inputs)
            labels = torch.squeeze(labels, -1) # [N, 1] -> [N] to match model outputs
            # Calculate the loss.
            loss = self.criterion(outputs, labels)
            train_running_loss += loss.item()
            SummaryWriter().add_scalar("train loss", loss.item())
            outputs = self.postprocess(outputs) # softmax + argmax output of model to get which action is predicted
            running_correct = torch.sum(outputs == labels) # count correct predictions
            train_running_correct += running_correct
            # Backpropagation.
            loss.backward()
            # Update the optimizer parameters.
            self.optimizer.step()

        # Loss and accuracy for the complete epoch.
        epoch_loss = train_running_loss / counter
        epoch_acc = 100. * (train_running_correct / len(trainloader.dataset))
        return epoch_loss, epoch_acc


    # Validation function.
    def validate(self, testloader):
        self.eval()
        print('Validation')
        valid_running_loss = 0.0
        valid_running_correct = 0
        counter = 0

        with torch.no_grad():
            for data in tqdm(testloader, total=len(testloader)):
                counter += 1
                inputs, labels = data['data'], data['label']
                inputs = inputs.to(device)
                labels = labels.long().to(device)
                labels = torch.squeeze(labels, -1) # [N, 1] -> [N] to match model outputs
                # Forward pass.
                outputs = self(inputs)
                # outputs = torch.squeeze(outputs, -1)
                # Calculate the loss.
                loss = self.criterion(outputs, labels)
                valid_running_loss += loss.item()
                outputs = self.postprocess(outputs) # softmax + argmax output of model to get which action is predicted
                self.confusion_matrix += confusion_matrix(labels.flatten(), outputs.flatten(), labels=[0, 1, 2, 3, 4, 5])
                running_correct = torch.sum(outputs == labels) # count correct predictions
                valid_running_correct += running_correct

        # Loss and accuracy for the complete epoch.
        epoch_loss = valid_running_loss / counter
        epoch_acc = 100. * (valid_running_correct / len(testloader.dataset))
        return epoch_loss, epoch_acc


    def process_confusion_matrix_class(self, class_num):
        tp = self.confusion_matrix[class_num, class_num]
        fp = self.confusion_matrix[class_num, :].sum() - tp
        fn = self.confusion_matrix[:, class_num].sum() - tp
        tn = self.confusion_matrix.sum() - tp - fp - fn
        ap = tp / (tp + fp)
        ar = tp / (tp + fn)
        f1 = 2 / (1 / ar) + (1 / ap)
        return tp, fp, tn, fn, ap, ar, f1

    def process_confusion_matrix_avg(self):
        classes = self.confusion_matrix.shape[0]
        tp, fp, tn, fn, ap, ar, f1 = 0, 0, 0, 0, 0, 0, 0
        scores = [tp, fp, tn, fn, ap, ar, f1]
        for clas in range(classes):
            scores = [old_score + new_score for old_score, new_score in zip(scores, self.process_confusion_matrix_class(clas))]
        scores = [score/classes for score in scores]
        return scores
        # tp, fp, tn, fn, ap, ar, f1


In [4]:
# new model
model = EncoderClassifier(
    input_features=18, # this is the combined number of features from iPhone gyroscope, movement, and acceleromoeter datasets
    num_classifications=6, # walking downstairs, upstairs, walking, jogging, sitting, standing
    num_layers=6,
    num_heads=6,
).float().to(device)

In [5]:
# load model
model = torch.load(MODEL_SAVE_DIR + 'model.pth')
model.eval()

EncoderClassifier(
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=18, out_features=18, bias=True)
    )
    (linear1): Linear(in_features=18, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=18, bias=True)
    (norm1): LayerNorm((18,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((18,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=18, out_features=18, bias=True)
        )
        (linear1): Linear(in_features=18, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): L

In [5]:
## print info if needed
# print(model)
# Total parameters and trainable parameters.
# total_params = sum(p.numel() for p in model.parameters())
# print(f"{total_params:,} total parameters.")
# total_trainable_params = sum(
#     p.numel() for p in model.parameters() if p.requires_grad)
# print(f"{total_trainable_params:,} training parameters.\n")

# ensure model works
input = torch.rand((10, 6, 18)).to(device)
out = model(input)
print(model.postprocess(out))
print(model.postprocess(out, text_labels=True))
assert out.shape == (10, 6)

# ensure loss works
labels = torch.rand((10, 6)).to(device)
diffs = model.criterion(out, labels)
print(diffs)

tensor([3, 4, 5, 5, 0, 4, 5, 1, 1, 1])
['jog', 'sit', 'std', 'std', 'dws', 'sit', 'std', 'ups', 'ups', 'ups']
tensor(6.1889, grad_fn=<DivBackward1>)


# Dataset Preprocess

In [8]:
all_data = None
action_dict = { # used in getting action info from dataset filenames
    "dws": 0,
    "ups": 1,
    "wlk": 2,
    "jog": 3,
    "sit": 4,
    "std": 5,
}
motion_path = Path(DATASET_DIR) / "A_DeviceMotion_data"
accel_path = Path(DATASET_DIR) / "B_Accelerometer_data"
gyro_path = Path(DATASET_DIR) / "C_Gyroscope_data"
motion_iter = [file for file in motion_path.iterdir() if file.name != ".DS_Store"] #.DS_Store appears on Mac and causes errors
torch_dataset = [] # list of dicts (each dict is a batch) where each dict has "data" and "label" values
print("PROCESSING DATASET")
for motion_action in tqdm(motion_iter):
    accel_action = accel_path / motion_action.name
    gyro_action = gyro_path / motion_action.name
    action = motion_action.name.__str__().split("_")[0] # getting action (standing, walking, etc.) from filename
    trial = motion_action.name.__str__().split("_")[1]
    action_int = action_dict[action]
    for motion_person in motion_action.glob('*.csv'):
        person_idx = str(motion_person.stem).split("_")[-1]
        accel_person = accel_action / motion_person.name
        gyro_person = gyro_action / motion_person.name
        # dataset processing
        motion_df = pd.read_csv(motion_person)
        motion_len = len(motion_df)
        accel_df = pd.read_csv(accel_person)
        accel_df = accel_df.rename(columns={"x":"accel_x", "y":"accel_y", "z":"accel_z"})
        accel_len = len(accel_df)
        gyro_df = pd.read_csv(gyro_person)
        gyro_df = gyro_df.rename(columns={"x":"gyro_x", "y":"gyro_y", "z":"gyro_z"})
        gyro_len = len(gyro_df)
        # motion, accelerometer, gyroscope dataset combination and processing
        dataset_len = min(motion_len, accel_len, gyro_len)
        person_df = pd.DataFrame(person_idx, index=np.arange(dataset_len), columns=["person_idx"], dtype=int)
        trial_df = pd.DataFrame(trial, index=np.arange(dataset_len), columns=["trial_number"], dtype=int)
        actions_df = pd.DataFrame(action_int, index=np.arange(dataset_len), columns=["action"], dtype=int)
        full_dataset = pd.concat([motion_df, accel_df, gyro_df, person_df, trial_df, actions_df], axis=1) # combine tables
        full_dataset = full_dataset.drop(["Unnamed: 0"], axis=1) # remove index columns
        full_dataset = full_dataset.iloc[:dataset_len]
        if all_data is None:
            all_data = full_dataset
        else:
            all_data = pd.concat([all_data, full_dataset])
        # # splitting data into batches for data loader
        # batches = len(full_dataset) // frames_per_batch
        # for batch in range(1, batches+1):
        #     batch_data = full_dataset.iloc[((batch - 1)*frames_per_batch):(batch*frames_per_batch)]
        #     batch_data_tensor = torch.tensor(batch_data.values).to(torch.float32)
        #     label_tensor = torch.tensor([action_int])
        #     training_dict = {"data":batch_data_tensor, "label":label_tensor}
        #     torch_dataset.append(training_dict)
print(f"NORMALIZING DATA")
dont_normalize = ["person_idx", "trial_number", "action"]
for column_name in tqdm(all_data.columns):
    if column_name in dont_normalize:
        continue
    all_data[column_name] -= all_data[column_name].min()
    all_data[column_name] /= all_data[column_name].max()
print("SAVING TO {DATASET_DIR + '/whole_dataset_norm.csv'}... will take some time")
all_data.to_csv(DATASET_DIR + "/whole_dataset_norm.csv")

PROCESSING DATASET


100%|██████████| 15/15 [00:27<00:00,  1.83s/it]


NORMALIZING DATA AND SAVING TO /Users/isaac/Desktop/Creative/Coding/Transformers/iPhone gyro/whole_dataset_norm.csv


100%|██████████| 21/21 [00:00<00:00, 30.58it/s]


# Dataset and Loader

In [7]:
# make pytorch dataset
class iPhoneMotionDataset(Dataset):
    def __init__(self, dataset_folder, frames_per_batch):
        if type(dataset_folder) != pathlib.PosixPath: # conversion from str to Pathlib Path
            dataset_folder = Path(dataset_folder)
        print("LOADING DATASET")
        pd_dataset = pd.read_csv(dataset_folder / "whole_dataset_norm.csv")
        self.torch_dataset = [] # list of dicts (each dict is a batch) where each dict has "data" and "label" values
        trials = pd_dataset["trial_number"].unique()
        people = pd_dataset["person_idx"].unique()
        print("DATASET  LOADED")
        print("PROCESSING DATASET")
        for trial in tqdm(trials):
            for person in people:
                data = pd_dataset[(pd_dataset["trial_number"] == trial) & (pd_dataset["person_idx"] == person)]
                labels = torch.tensor(data["action"].values)
                data = data.drop(["trial_number", "person_idx", "action", "Unnamed: 0"], axis=1)
                data = torch.tensor(data.values)
                batches = len(data) // frames_per_batch
                for batch in range(1, batches+1):
                    batch_data = data[((batch - 1)*frames_per_batch):(batch*frames_per_batch)].float()
                    label_tensor = labels[(batch - 1)*frames_per_batch].float()
                    training_dict = {"data":batch_data, "label":label_tensor}
                    self.torch_dataset.append(training_dict)
    def __len__(self):
        return len(self.torch_dataset)
    def __getitem__(self, idx):
        return self.torch_dataset[idx]

iphone_motion_dataset = iPhoneMotionDataset(
    dataset_folder=DATASET_DIR,
    frames_per_batch=NUMBER_OF_FRAMES_EXAMINED
    )

LOADING DATASET
DATASET  LOADED
PROCESSING DATASET


100%|██████████| 15/15 [00:01<00:00,  7.80it/s]


In [8]:
iphone_motion_dataset[0]

{'data': tensor([[0.1632, 0.1570, 0.1401,  ..., 0.5651, 0.4814, 0.4742],
         [0.1581, 0.1658, 0.1364,  ..., 0.5678, 0.4982, 0.4785],
         [0.1572, 0.1732, 0.1344,  ..., 0.5802, 0.5229, 0.4826],
         ...,
         [0.5123, 0.1752, 0.8246,  ..., 0.5436, 0.5363, 0.5181],
         [0.5126, 0.1549, 0.8213,  ..., 0.4930, 0.5399, 0.4773],
         [0.5038, 0.1289, 0.8088,  ..., 0.4677, 0.5330, 0.4283]]),
 'label': tensor(0.)}

In [9]:
# splitting data into train and val datasets and dataloaders
iphone_motion_train, iphone_motion_val = torch.utils.data.random_split(iphone_motion_dataset, [1 - VALID_SPLIT, VALID_SPLIT])
iphone_motion_dataloader_train = DataLoader(iphone_motion_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
iphone_motion_dataloader_val = DataLoader(iphone_motion_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

In [13]:
a = torch.zeros((3,8))
print(a)

tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


# Train

In [143]:
model.train_run(iphone_motion_dataloader_train)

Training


100%|██████████| 925/925 [02:49<00:00,  5.45it/s]


(1.4082970644976642, tensor(74.5199))

# Validate

In [10]:
model.validate(iphone_motion_dataloader_val)

Validation


100%|██████████| 231/231 [00:05<00:00, 41.26it/s]


(0.42002510172980173, tensor(90.6385))

# Full Training Loop

activate GUI with \
```tensorboard --logdir=runs```

In [9]:
# Lists to keep track of losses and accuracies.
train_loss, valid_loss = [], []
train_acc, valid_acc = [], []
least_loss = float('inf')
# Start the training.
for epoch in range(EPOCHS):
    print(f"[INFO]: Epoch {epoch+1} of {EPOCHS}")
    train_epoch_loss, train_epoch_acc = model.train_run(iphone_motion_dataloader_train)
    valid_epoch_loss, valid_epoch_acc = model.validate(iphone_motion_dataloader_val)
    train_loss.append(train_epoch_loss)
    SummaryWriter().add_scalar("Train Epoch Loss", train_epoch_loss, epoch)
    SummaryWriter().add_scalar("Train Epoch Accuracy", train_epoch_acc, epoch)
    valid_loss.append(valid_epoch_loss)
    SummaryWriter().add_scalar("Validation Epoch Loss", valid_epoch_loss)
    SummaryWriter().add_scalar("Validation Epoch Accuracy", valid_epoch_acc)
    print(model.confusion_matrix.shape)
    fig = sn.heatmap(model.confusion_matrix, annot=True, ).get_figure()
    fig.canvas.draw()
    # Now we can save it to a numpy array.
    image = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    SummaryWriter().add_image("Confusion Matrix", image.transpose((2, 0, 1)))
    plt.clf()
    tp, fp, tn, fn, ap, ar, f1 = model.process_confusion_matrix_avg()
    SummaryWriter().add_scalar("True Positives", tp, epoch)
    SummaryWriter().add_scalar("False Positives", fp, epoch)
    SummaryWriter().add_scalar("False Negatives", fn, epoch)
    SummaryWriter().add_scalar("True Negatives", tn, epoch)
    SummaryWriter().add_scalar("Average Recall", ar, epoch)
    SummaryWriter().add_scalar("Average Precision", ap, epoch)
    SummaryWriter().add_scalar("F1 Score", f1, epoch)

    train_acc.append(train_epoch_acc)
    valid_acc.append(valid_epoch_acc)
    print(f"Training loss: {train_epoch_loss}, training acc: {train_epoch_acc}")
    print(f"Validation loss: {valid_epoch_loss}, validation acc: {valid_epoch_acc}")
    # Save model.
    if valid_epoch_loss < least_loss:
        least_loss = valid_epoch_loss
        print(f"Saving best model till now... LEAST LOSS {valid_epoch_loss:.3f}")
        torch.save(model, MODEL_SAVE_DIR + 'model.pth')
    print('-'*50)

[INFO]: Epoch 1 of 30
Training


100%|██████████| 925/925 [02:55<00:00,  5.27it/s]


Validation


100%|██████████| 231/231 [00:07<00:00, 29.59it/s]


torch.Size([6, 6])
Training loss: 0.4473708619620349, training acc: 87.12469482421875
Validation loss: 0.4753989411509914, validation acc: 88.0952377319336
Saving best model till now... LEAST LOSS 0.475
--------------------------------------------------
[INFO]: Epoch 2 of 30
Training


100%|██████████| 925/925 [02:56<00:00,  5.25it/s]


Validation


100%|██████████| 231/231 [00:07<00:00, 32.98it/s]


torch.Size([6, 6])
Training loss: 0.42253752833282626, training acc: 87.96321105957031
Validation loss: 0.4485046708093577, validation acc: 88.04113006591797
Saving best model till now... LEAST LOSS 0.449
--------------------------------------------------
[INFO]: Epoch 3 of 30
Training


100%|██████████| 925/925 [03:20<00:00,  4.62it/s]


Validation


100%|██████████| 231/231 [00:13<00:00, 17.59it/s]


torch.Size([6, 6])
Training loss: 0.4002319476854157, training acc: 88.7882080078125
Validation loss: 0.4529113679221182, validation acc: 89.23160552978516
--------------------------------------------------
[INFO]: Epoch 4 of 30
Training


  5%|▌         | 50/925 [00:19<15:51,  1.09s/it]

In [58]:
# # Example of target with class indices
# loss = torch.nn.CrossEntropyLoss()
# input = torch.randn(3, 5, requires_grad=True)
# target = torch.empty(3, dtype=torch.long).random_(5)
# print(input.shape)
# print(target.shape)
# print(input)
# print(target)
# output = loss(input, target)
# output.backward()

# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
print(input)
print(target)
output = loss(input, target)
output.backward()

tensor([[-0.3666, -0.0324, -1.9733, -0.1413, -0.6707],
        [ 0.3253,  0.4910,  0.8005, -0.7587,  1.2424],
        [-0.2742, -0.1118,  0.5459,  1.1000,  0.5414]], requires_grad=True)
tensor([[0.0815, 0.4662, 0.3493, 0.0301, 0.0729],
        [0.3190, 0.3179, 0.2653, 0.0166, 0.0812],
        [0.2133, 0.5532, 0.0749, 0.0165, 0.1421]])
