# Seated Posture assessment using Images and Neural NEtworks (SPINNE)

<img src="Logo.png" style="width: 500px"/>

## Desk posture classifier

For use with Jupyter Notebooks or Google Colab. The cells to load the dataset will not work as the dataset is not publicly available. ***Please see the demo notebook for an interactive webcam posture classifier demo.***

<img src="fig1.png">


                    Figure 1: Desk dataset; A - good, B - bad back, C - bad neck, D - bad slouch

## 1) Load relevant libraries

In [None]:
# import libraries
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
import torch.nn as nn

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import numpy as np
import time

# libraries for webcam use
import cv2
from IPython import display

# CUDA config
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

### 1.1) Define some useful functions

In [None]:
# useful functions
class_labels = {
    0 : "good",
    1 : "bad - back",
    2 : "bad - neck",
    3 : "bad - slouch"
}

def imshow(im,label):
    plt.imshow(im.numpy().transpose((1,2,0))) # pytorch tensors for RGB images are 3xWxH
    plt.axis("off")
    
    if(label != -1):
        class_label = class_labels[int(label)]
        if(class_label == 'good'):
            print('\033[92m' + class_label + '\033[0m')
        else:
            print('\033[91m' + class_label + '\033[0m')
            
def classify_dataset(model,dataloader):
    # this loop classifies the data and returns an overall accuracy when compared with the ground truth
    n_images = len(dataloader.dataset)

    # vector to store predictions for each image in test set
    predictions = []

    # keep track of predictions which match actual label (groudn truth)
    n_matching_predictions = 0

    # loop over all batches in the dataset
    for images,labels in dataloader:
        # get output from current network
        if torch.cuda.is_available():
            output = model(images.cuda())
        else:
            output = model(images)    

        ps = torch.exp(output)

        max_val,max_idx = ps.max(1)

        for i in range(len(images)):
            predictions.append(max_idx[i])
            if(max_idx[i] == labels[i]):
                n_matching_predictions += 1
                
    classification_accuracy = n_matching_predictions / n_images
    
    return predictions,classification_accuracy

def get_labels(model,dataloader):
    # this loop get the target labels and predicted labels - assumes a batch size of 1

    # vector to store target labels for each image in test set
    targets = []
    # vector to store predictions for each image in test set
    predictions = []

    # loop over all batches in the dataset
    for images,labels in dataloader:
        # get output from current network
        if torch.cuda.is_available():
            output = model(images.cuda())
        else:
            output = model(images)    

        ps = torch.exp(output)    

        predictions.append(output.argmax(dim=1))
        if torch.cuda.is_available():
            targets.append(labels.cuda())        
        else:
            targets.append(labels)

    for i in range(len(predictions)):
        predictions[i] = predictions[i].to(device='cpu')
        targets[i] = targets[i].to(device='cpu')
            
    return targets, predictions

# this is from https://deeplizard.com/learn/video/0LhiS6yu2qQ
import itertools

def plot_confusion_matrix(cm, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    classes = ['good','bad - back','bad - neck','bad - slouch']

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

## 2) Load dataset (please skip this section if you don't have the dataset)

### 2.1) Mount Google Drive and inflate files - for use with Google Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip "/content/drive/My Drive/Colab Notebooks/Data/TRAIN/TRAIN.zip" -d "/content"
!unzip "/content/drive/My Drive/Colab Notebooks/Data/VAL/VAL.zip" -d "/content"
!unzip "/content/drive/My Drive/Colab Notebooks/Data/TEST/TEST.zip" -d "/content"

### 2.2) Load dataset transforms - unique for each dataset participant

In [None]:
# load in training and testing data

# using rescale/position/rotation variance in TRAINING DATA

# for image sets 1,2,5,6 (default image sets other transforms are roughly based on - for normalisation purposes)
# - we still provide reasonable augmentation to all the data so that the system is generalisable
ryan_transform_1 = transforms.Compose([transforms.RandomHorizontalFlip(),
                                         transforms.RandomAffine(degrees=0,translate=(0.15,0.1),scale=(0.85,1.15)), # translate - horizontal, vertical; scale - zoom out, zoom in
                                         transforms.ColorJitter(brightness=(0.85,1.15),contrast=(0.85,1.15)), # colour augmentation
                                         transforms.Resize(128), # height of resized image
                                         transforms.ToTensor()])

# for image sets 3,4 (very close to camera, slightly dark - higher brightness more likely in transform)
ryan_transform_2 = transforms.Compose([transforms.RandomHorizontalFlip(),
                                         transforms.RandomAffine(degrees=0,translate=(0.15,0.1),scale=(0.7,0.85)), # translate - horizontal, vertical; scale - zoom out, zoom in
                                         transforms.ColorJitter(brightness=(0.9,1.25),contrast=(0.85,1.15)), # colour augmentation
                                         transforms.Resize(128), # height of resized image
                                         transforms.ToTensor()])

# for image sets 9,10 (slightly further away from camera, slightly dark - higher brightness more likely in transform)
ross_transform_1 = transforms.Compose([transforms.RandomHorizontalFlip(),
                                         transforms.RandomAffine(degrees=0,translate=(0.15,0.1),scale=(1.0,1.25)), # translate - horizontal, vertical; scale - zoom out, zoom in
                                         transforms.ColorJitter(brightness=(0.9,1.25),contrast=(0.85,1.15)), # colour augmentation
                                         transforms.Resize(128), # height of resized image
                                         transforms.ToTensor()])

# for image set 11 (quite close to camera, slightly dark - higher brightness more likely in transform)
nick_transform_1 = transforms.Compose([transforms.RandomHorizontalFlip(),
                                         transforms.RandomAffine(degrees=0,translate=(0.15,0.1),scale=(0.8,1.0)), # translate - horizontal, vertical; scale - zoom out, zoom in
                                         transforms.ColorJitter(brightness=(0.9,1.25),contrast=(0.85,1.15)), # colour augmentation
                                         transforms.Resize(128), # height of resized image
                                         transforms.ToTensor()])

# for image set 7 (slightly close to camera)
shaun_transform_1 = transforms.Compose([transforms.RandomHorizontalFlip(),
                                         transforms.RandomAffine(degrees=0,translate=(0.15,0.1),scale=(0.8,1.05)), # translate - horizontal, vertical; scale - zoom out, zoom in
                                         transforms.ColorJitter(brightness=(0.85,1.15),contrast=(0.85,1.15)), # colour augmentation
                                         transforms.Resize(128), # height of resized image
                                         transforms.ToTensor()])

# for image set 8 (slightly low down)
mum_transform_1 = transforms.Compose([transforms.RandomHorizontalFlip(),
                                         transforms.RandomAffine(degrees=0,translate=(0.15,0),scale=(0.85,1.15)), # translate - horizontal, vertical; scale - zoom out, zoom in
                                         transforms.ColorJitter(brightness=(0.85,1.15),contrast=(0.85,1.15)), # colour augmentation
                                         transforms.Resize(128), # height of resized image
                                         transforms.ToTensor()])

# original basic transform
transform = transforms.Compose([transforms.Resize(128), # height of resized image
                                transforms.ToTensor()])

### 2.3) Load dataset (Jupyter Notebooks)

In [None]:
# LOCAL (from local harddrive - Jupyter Notebooks)
train_dataset_1 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/1', transform = ryan_transform_1)
train_dataset_2 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/2', transform = ryan_transform_1)
train_dataset_3 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/3', transform = ryan_transform_2)
train_dataset_4 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/4', transform = ryan_transform_2)
train_dataset_5 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/5', transform = ryan_transform_1)
train_dataset_6 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/6', transform = ryan_transform_1)
train_dataset_7 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/7', transform = nick_transform_1)
train_dataset_8 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/8', transform = ross_transform_1)
train_dataset_9 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TRAIN/9', transform = ross_transform_1)

val_dataset_1 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/1', transform = ryan_transform_1)
val_dataset_2 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/2', transform = ryan_transform_1)
val_dataset_3 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/3', transform = ryan_transform_2)
val_dataset_4 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/4', transform = ryan_transform_2)
val_dataset_5 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/5', transform = ryan_transform_1)
val_dataset_6 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/6', transform = ryan_transform_1)
val_dataset_7 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/7', transform = nick_transform_1)
val_dataset_8 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/8', transform = ross_transform_1)
val_dataset_9 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/VAL/9', transform = ross_transform_1)

test_dataset_1 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TEST/1', transform = shaun_transform_1)
test_dataset_2 = datasets.ImageFolder('C:/Users/ryan_/Documents/University/EE581/Project/Data/Office/TEST/2', transform = mum_transform_1)

### 2.4) Load dataset (Google Colab)

In [None]:
# ONLINE (from Google Drive as zip archives - Google Colab)
train_dataset_1 = datasets.ImageFolder('/content/TRAIN/1', transform = ryan_transform_1)
train_dataset_2 = datasets.ImageFolder('/content/TRAIN/2', transform = ryan_transform_1)
train_dataset_3 = datasets.ImageFolder('/content/TRAIN/3', transform = ryan_transform_2)
train_dataset_4 = datasets.ImageFolder('/content/TRAIN/4', transform = ryan_transform_2)
train_dataset_5 = datasets.ImageFolder('/content/TRAIN/5', transform = ryan_transform_1)
train_dataset_6 = datasets.ImageFolder('/content/TRAIN/6', transform = ryan_transform_1)
train_dataset_7 = datasets.ImageFolder('/content/TRAIN/7', transform = nick_transform_1)
train_dataset_8 = datasets.ImageFolder('/content/TRAIN/8', transform = ross_transform_1)
train_dataset_9 = datasets.ImageFolder('/content/TRAIN/9', transform = ross_transform_1)

val_dataset_1 = datasets.ImageFolder('/content/VAL/1', transform = ryan_transform_1)
val_dataset_2 = datasets.ImageFolder('/content/VAL/2', transform = ryan_transform_1)
val_dataset_3 = datasets.ImageFolder('/content/VAL/3', transform = ryan_transform_2)
val_dataset_4 = datasets.ImageFolder('/content/VAL/4', transform = ryan_transform_2)
val_dataset_5 = datasets.ImageFolder('/content/VAL/5', transform = ryan_transform_1)
val_dataset_6 = datasets.ImageFolder('/content/VAL/6', transform = ryan_transform_1)
val_dataset_7 = datasets.ImageFolder('/content/VAL/7', transform = nick_transform_1)
val_dataset_8 = datasets.ImageFolder('/content/VAL/8', transform = ross_transform_1)
val_dataset_9 = datasets.ImageFolder('/content/VAL/9', transform = ross_transform_1)
## 2.4) Load dataset (Google Colab)
test_dataset_1 = datasets.ImageFolder('/content/TEST/1', transform = shaun_transform_1)
test_dataset_2 = datasets.ImageFolder('/content/TEST/2', transform = mum_transform_1)

### 2.5) Concatenate seperate participant datasets (with different transforms) into single dataset

In [None]:
# concatenate datasets
train_dataset = torch.utils.data.ConcatDataset([train_dataset_1,
                                               train_dataset_2,
                                               train_dataset_3,
                                               train_dataset_4,
                                               train_dataset_5,
                                               train_dataset_6,
                                               train_dataset_7,
                                               train_dataset_8,
                                               train_dataset_9])

val_dataset = torch.utils.data.ConcatDataset([val_dataset_1,
                                             val_dataset_2,
                                             val_dataset_3,
                                             val_dataset_4,
                                             val_dataset_5,
                                             val_dataset_6,
                                             val_dataset_7,
                                             val_dataset_8,
                                             val_dataset_9])

test_dataset = torch.utils.data.ConcatDataset([test_dataset_1,
                                              test_dataset_2])

### 2.6) Create dataloaders

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset,batch_size=32,shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset,batch_size=32,shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset,batch_size=1,shuffle=True)

### 2.7) Display some images for testing

In [None]:
images,labels = next(iter(train_dataloader))
imshow(images[0],labels[0])

## 3) Load classifier CNN model

### 3.1) Define SpinNet architecture

In [None]:
#v2 (with dropout and two hidden layers with activations in fcn)

class SpinNET(nn.Module):
    def __init__(self):
        super().__init__()
        
        # feature extraction (convolution and pooling)
        self.conv_1 = nn.Conv2d(3,16,kernel_size=(3,3),padding=(1,1))
        self.pool_1 = nn.MaxPool2d(kernel_size=(3,3), stride=3)
        
        self.conv_2 = nn.Conv2d(16,32,kernel_size=(3,3),padding=(1,1))
        self.pool_2 = nn.MaxPool2d(kernel_size=(3,3), stride=3)
        
        self.conv_3 = nn.Conv2d(32,16,kernel_size=(3,3),padding=(1,1))
        self.pool_3 = nn.MaxPool2d(kernel_size=(2,2), stride=2)
        
        # classifier (fully connected network)
        self.hidden_1 = nn.Linear(1344,256)
        self.hidden_2 = nn.Linear(256,256)
        self.output_layer = nn.Linear(256,4)
        self.classification_fn = nn.LogSoftmax(dim=1)

        self.dropout = nn.Dropout(p=0.3)
        
    def forward(self,x):
        x = F.relu(self.conv_1(x))
        x = self.pool_1(x)
        
        x = F.relu(self.conv_2(x))
        x = self.pool_2(x)
        
        x = F.relu(self.conv_3(x))
        x = self.pool_3(x)
        
        x = self.dropout(F.relu(self.hidden_1(x.view(-1,1344))))
        x = self.dropout(F.relu(self.hidden_2(x)))
        x = self.output_layer(x)
        x = self.classification_fn(x)
        return x

### 3.2) Load SpinNet model for training

In [None]:
#This is selecting the custom model
model = SpinNET()

### 3.3) Download pretrained ILSVRC model for transfer learning (download may take some time)

In [None]:
#This is selecting the pretrained model
# model = torch.hub.load('pytorch/vision:v0.6.0', 'vgg11', pretrained=False)


### 3.4) modify ILSVRC network to have 4 outputs

In [None]:
# mod = list(model.classifier.children())

# # change last linear layer to correct number of output classes and add a log softmax classifier
# mod.pop()
# mod.append(nn.Linear(4096,4))
# mod.append(nn.LogSoftmax(dim=1))

# model.classifier = torch.nn.Sequential(*mod)

In [None]:
if torch.cuda.is_available():
    model = model.cuda()

## 4) Training

In [None]:
from torch import nn
from torch import optim

# define loss function and optimiser type/parameters

criterion = nn.NLLLoss()

optimiser = optim.Adam(model.parameters(),lr=0.0001)
#optimiser = optim.SGD(model.parameters(),lr=0.01)

In [None]:
epochs = 30

model.train()

for e in range(epochs):
    running_loss = 0
    
    for images,labels in train_dataloader:   
        # clear gradients
        optimiser.zero_grad()
        
        # get output from current network
        if torch.cuda.is_available():
            output = model(images.cuda())
        else:
            output = model(images)
        
        # calculate loss for this epoch - cross-entropy loss (log softmax plus negative log likelihood)
        if torch.cuda.is_available():
            loss = criterion(output.cuda(),labels.cuda())
        else:
            loss = criterion(output,labels)
        
        # back propagation of loss
        loss.backward()
        
        # optimisation step - update weights
        optimiser.step()
        
        running_loss += loss.item()
    else:
        print("EPOCH ", str(e+1))
        print(f"Training loss: {running_loss / len(train_dataloader)}")
        
        # compute the training accuracy
        predictions,classification_accuracy = classify_dataset(model,train_dataloader)
        print(f"Training accuracy: {classification_accuracy*100}%")
        
        # compute validation loss for this epoch - could also do this for every batch in one epoch if wanted
        running_loss = 0
        
        for images,labels in val_dataloader: 
            # get output from updated network
            if torch.cuda.is_available():
                output = model(images.cuda())
            else:
                output = model(images)
            
            # calculate cross-entropy loss for this validation batch
            if torch.cuda.is_available():
                loss = criterion(output.cuda(),labels.cuda())
            else:
                loss = criterion(output,labels)
            
            running_loss += loss.item()
        else:
            print(f"Validation loss: {running_loss / len(val_dataloader)}")
            
            # compute the training accuracy
            predictions,classification_accuracy = classify_dataset(model,val_dataloader)            
            print(f"Validation accuracy: {classification_accuracy*100}%")
            
            print("===============================================")
            
            
# save the trained model in a file
torch.save(model.state_dict(), 'spinnet_trained.pth')

## 5) Testing

### 5.1) Load our trained model

In [None]:
# load training model if required
if torch.cuda.is_available():
    state_dict = torch.load('spinnet_trained_2.7.pth')
else:
    state_dict = torch.load('spinnet_trained_2.7.pth',map_location=torch.device('cpu'))
    
model.load_state_dict(state_dict)

### 5.2) Get classification accuracy

In [None]:
model.eval()

predictions,classification_accuracy = classify_dataset(model,test_dataloader)

In [None]:
print("Classification accuracy: ", str(classification_accuracy * 100), "%")

### 5.3) Plot confusion matrix for testing data

In [None]:
#produce confusion matrix
targets, predictions = get_labels(model,test_dataloader)

In [None]:
cm = confusion_matrix(targets, predictions)
plot_confusion_matrix(cm)

### 5.4) meature time for classification

In [None]:
import time
image_test,label_test = next(iter(test_dataloader))
imshow(image_test[0],label_test[0])
start_time = time.time()
model_output = model(image_test)
print("Time taken: ", time.time()-start_time)
print("Prediction: ", torch.argmax(torch.exp(model_output)))

## 6) Real-time webcam demo (does not work on Google Colab)

In [None]:
vc = cv2.VideoCapture(0,cv2.CAP_DSHOW) # change to zero usually (I have 2 webcams)
vc.set(cv2.CAP_PROP_FRAME_WIDTH, 1280.0)
vc.set(cv2.CAP_PROP_FRAME_HEIGHT, 720.0)

In [None]:
%matplotlib inline

plt.figure(figsize=(15,15*9/16))

for i in range(100):
    # get the frame
    if vc.isOpened():
        is_capturing, frame = vc.read()
            
    # resize the input image and convert to pytorch tensor
    frame_small = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), dsize=(227, 128))
    frame_tensor = torch.from_numpy(frame_small.astype(float).transpose(2,0,1)/256).view(1,3,128,227).float()
    
    # compute CNN output
    if torch.cuda.is_available():
        output = model(frame_tensor.cuda())
    else:
        output = model(frame_tensor)
        
    ps = torch.exp(output)
    
    # get index of maximum probaability - corresponds to the class
    max_val,max_idx = ps[0].max(0)
    
    # update display
    plt.clf()
    plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # show the original image (not downscaled version fed to CNN)
    plt.axis("off")
    if(int(max_idx) == 0):
        with plt.rc_context({'axes.titlecolor':'green'}):
            plt.title(class_labels[int(max_idx)],fontsize=32)
    else:
        with plt.rc_context({'axes.titlecolor':'red'}):
            plt.title(class_labels[int(max_idx)],fontsize=32)
    display.display(plt.gcf())
    display.clear_output(wait=True)
    
    # fps
    time.sleep(1/10)

In [None]:
vc.release() # switch off the webcam