# Seated Posture assessment using Images and Neural NEtworks (SPINNE)

<img src="Logo.png" style="width: 500px"/>

## Desk posture classifier [DEMO]

<img src="fig1.png">


                    Figure 1: Desk dataset; A - good, B - bad back, C - bad neck, D - bad slouch

## 1) Load relevant libraries

In [None]:
# import libraries
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
import torch.nn as nn

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import numpy as np
import time

# libraries for webcam use
import cv2
from IPython import display

# CUDA config
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

### 1.1) Define some useful functions

In [None]:
# useful functions
class_labels = {
    0 : "good",
    1 : "bad - back",
    2 : "bad - neck",
    3 : "bad - slouch"
}

def imshow(im,label):
    plt.imshow(im.numpy().transpose((1,2,0))) # pytorch tensors for RGB images are 3xWxH
    plt.axis("off")
    
    if(label != -1):
        class_label = class_labels[int(label)]
        if(class_label == 'good'):
            print('\033[92m' + class_label + '\033[0m')
        else:
            print('\033[91m' + class_label + '\033[0m')
            
def classify_dataset(model,dataloader):
    # this loop classifies the data and returns an overall accuracy when compared with the ground truth
    n_images = len(dataloader.dataset)

    # vector to store predictions for each image in test set
    predictions = []

    # keep track of predictions which match actual label (groudn truth)
    n_matching_predictions = 0

    # loop over all batches in the dataset
    for images,labels in dataloader:
        # get output from current network
        if torch.cuda.is_available():
            output = model(images.cuda())
        else:
            output = model(images)    

        ps = torch.exp(output)

        max_val,max_idx = ps.max(1)

        for i in range(len(images)):
            predictions.append(max_idx[i])
            if(max_idx[i] == labels[i]):
                n_matching_predictions += 1
                
    classification_accuracy = n_matching_predictions / n_images
    
    return predictions,classification_accuracy

def get_labels(model,dataloader):
    # this loop get the target labels and predicted labels - assumes a batch size of 1

    # vector to store target labels for each image in test set
    targets = []
    # vector to store predictions for each image in test set
    predictions = []

    # loop over all batches in the dataset
    for images,labels in dataloader:
        # get output from current network
        if torch.cuda.is_available():
            output = model(images.cuda())
        else:
            output = model(images)    

        ps = torch.exp(output)    

        predictions.append(output.argmax(dim=1))
        targets.append(labels)
            
    return targets, predictions

# this is from https://deeplizard.com/learn/video/0LhiS6yu2qQ
import itertools

def plot_confusion_matrix(cm, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    classes = ['good','bad - back','bad - neck','bad - slouch']

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

## 2) Load classifier CNN model

### 2.1) Define SpinNet architecture

In [None]:
#v2 (with dropout and two hidden layers with activations in fcn)

class SpinNET(nn.Module):
    def __init__(self):
        super().__init__()
        
        # feature extraction (convolution and pooling)
        self.conv_1 = nn.Conv2d(3,16,kernel_size=(3,3),padding=(1,1))
        self.pool_1 = nn.MaxPool2d(kernel_size=(3,3), stride=3)
        
        self.conv_2 = nn.Conv2d(16,32,kernel_size=(3,3),padding=(1,1))
        self.pool_2 = nn.MaxPool2d(kernel_size=(3,3), stride=3)
        
        self.conv_3 = nn.Conv2d(32,16,kernel_size=(3,3),padding=(1,1))
        self.pool_3 = nn.MaxPool2d(kernel_size=(2,2), stride=2)
        
        # classifier (fully connected network)
        self.hidden_1 = nn.Linear(1344,256)
        self.hidden_2 = nn.Linear(256,256)
        self.output_layer = nn.Linear(256,4)
        self.classification_fn = nn.LogSoftmax(dim=1)

        self.dropout = nn.Dropout(p=0.3)
        
    def forward(self,x):
        x = F.relu(self.conv_1(x))
        x = self.pool_1(x)
        
        x = F.relu(self.conv_2(x))
        x = self.pool_2(x)
        
        x = F.relu(self.conv_3(x))
        x = self.pool_3(x)
        
        x = self.dropout(F.relu(self.hidden_1(x.view(-1,1344))))
        x = self.dropout(F.relu(self.hidden_2(x)))
        x = self.output_layer(x)
        x = self.classification_fn(x)
        return x

### 2.2) Load SpinNet model (which has already been trained on our dataset)

In [None]:
#This is selecting the custom model
model = SpinNET()

In [None]:
if torch.cuda.is_available():
    model = model.cuda()

In [None]:
# load training model if required
state_dict = torch.load('spinnet_trained_2.7.pth',map_location=torch.device('cpu'))
model.load_state_dict(state_dict)

## 3) Real-time webcam demo (does not work on Google Colab)

In [None]:
vc = cv2.VideoCapture(0,cv2.CAP_DSHOW) # change to zero usually (I have 2 webcams)
vc.set(cv2.CAP_PROP_FRAME_WIDTH, 1280.0)
vc.set(cv2.CAP_PROP_FRAME_HEIGHT, 720.0)

In [None]:
%matplotlib inline

plt.figure(figsize=(15,15*9/16))

for i in range(100):
    # get the frame
    if vc.isOpened():
        is_capturing, frame = vc.read()
            
    # resize the input image and convert to pytorch tensor
    frame_small = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), dsize=(227, 128))
    frame_tensor = torch.from_numpy(frame_small.astype(float).transpose(2,0,1)/256).view(1,3,128,227).float()
    
    # compute CNN output
    if torch.cuda.is_available():
        output = model(frame_tensor.cuda())
    else:
        output = model(frame_tensor)
        
    ps = torch.exp(output)
    
    # get index of maximum probaability - corresponds to the class
    max_val,max_idx = ps[0].max(0)
    
    # update display
    plt.clf()
    plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # show the original image (not downscaled version fed to CNN)
    plt.axis("off")
    if(int(max_idx) == 0):
        with plt.rc_context({'axes.titlecolor':'green'}):
            plt.title(class_labels[int(max_idx)],fontsize=32)
    else:
        with plt.rc_context({'axes.titlecolor':'red'}):
            plt.title(class_labels[int(max_idx)],fontsize=32)
    display.display(plt.gcf())
    display.clear_output(wait=True)
    
    # fps
    time.sleep(1/10)

In [None]:
vc.release() # switch off the webcam