## The University of Melbourne, School of Computing and Information Systems
## Pose classification with naive Bayes

Note: This project is a part of the assignments in COMP30027 Machine learning


In [1]:
import numpy
import copy
import math
import scipy.stats
import csv
import re

#Constants
CIRCLE = 360
LOW = -99999
MISSING_VALUE = 9999

In [2]:
# This function prepare the data by reading it from a file and converting it into a useful format for training
def preprocess(filename):
    X = []  # store instances (temporally)
    y = []  # store class labels
    with open(filename, 'r') as datafile:
        for line in datafile:
            instance = line.strip().split(",")
            X.append(instance[:])
            y.append(instance[0])  # class label is at pos 0 in a line

    instanceDict = {}  # use dict to store all instances
    for classLabel in set(y):
        instanceDict[classLabel] = []  # add new key(class label)
        for instance in X:
            if instance[0] == classLabel:
                instanceDict[classLabel].append([float(value) for value in instance[1:]])  # add in all instances with this class

    return len(X[0])-1, instanceDict, set(y)  # return number of attributes, instances and class labels

In [3]:
# This function prepare the data by reading it from a file and converting it into a useful format for testing
def preprocessTest(filename):
    testInstances = []
    with open(filename, 'r') as datafile:
        for line in datafile:
            instance = line.strip().split(",")
            testInstances.append(instance)
    return testInstances

In [4]:
# This function calculat prior probabilities and likelihoods from the training data and using them to build a naive Bayes model

def train(attributesNum,trainInstancesDict,classLabels):
    model = {}  # dict for model, class label as key, 22 attributes (mean, std) tuples (in a form of list) as value
    priors = {}  # priors for each class
    instancesNum = 0  # total instance number
    for classLabel in classLabels:
        attributesDistri = []  # list store all (attributes | this class) mean and std
        for attri in range(0,attributesNum):
            data = []  # store all data
            for instances in trainInstancesDict[classLabel]:
                if instances[attri] < MISSING_VALUE:  # drop missing value
                    data.append(instances[attri])
            mean = numpy.mean(data)  # mean
            std = numpy.std(data)  # standard deviation
            attributesDistri.append((mean,std))
        model[classLabel] = attributesDistri  # add in the model dict
        priors[classLabel] = len(trainInstancesDict[classLabel])  # find each class label appearance times
        instancesNum += len(trainInstancesDict[classLabel])  # calculate total instances number
    for key in priors.keys():  # calculate prior by dividing the total instances number
        priors[key] = priors[key]/instancesNum
    return priors, model

In [5]:
# This function predict classes for new items in a test dataset

def predict(priors,model,testInstances,attributesNum,classLabels):
    for testInstance in testInstances:  # iterate all test instance
        probabilities = []  # probability for each class label
        for classLabel in classLabels:
            probability = math.log(priors[classLabel])  # log(P(c))
            for attri in range (1,attributesNum+1):
                thisAttriMean,thisAttriStd = model[classLabel][attri-1]
                thisAttriValue = float(testInstance[attri])
                if thisAttriValue < MISSING_VALUE:  # not a missing value
                    pAttrigivenClass = scipy.stats.norm(loc=thisAttriMean,scale=thisAttriStd).pdf(thisAttriValue)
                    if pAttrigivenClass > 0:  # probability is not proximal to 0
                        probability += math.log(pAttrigivenClass)
            probabilities.append((classLabel,probability))  # add in the probability of this class
        highestProbabilityValue = LOW
        predictedLabel = probabilities[0][1]
        for classProbability in probabilities:  # predict with the class with highest probability
            if classProbability[1] > highestProbabilityValue:
                highestProbabilityValue = classProbability[1]
                predictedLabel = classProbability[0]
        testInstance[0] = predictedLabel
    return testInstances

In [6]:
# This function evaluate the prediction performance by comparing your model’s class outputs to ground truth labels

def evaluate(classLabels, prediction, testInstances):
    correct = 0
    incorrect = 0
    evaluations = {}
    for classLabel in classLabels:  # initialization
        evaluations[classLabel]= {"TP":0,"FN":0,"FP":0}
        for i in range (0,len(prediction)):  # iterate through all test instances
            if prediction[i][0] == classLabel:
                if prediction[i][0] == testInstances[i][0]:  # true positive
                    evaluations[classLabel]["TP"] += 1
                    correct += 1
                else:  # false positive
                    evaluations[classLabel]["FP"] += 1
                    incorrect += 1
            if testInstances[i][0] == classLabel and prediction[i][0] != testInstances[i][0]:  # false negative
                evaluations[classLabel]["FN"] += 1
    macroPrecision, macroRecall, sumTP, sumTPFN, sumTPFP  = 0,0,0,0,0
    for eachClass in evaluations.keys():
        macroPrecision += evaluations[eachClass]["TP"] / \
                                              (evaluations[eachClass]["TP"]+evaluations[eachClass]["FP"])  # sum each class precision
        macroRecall += evaluations[eachClass]["TP"] / \
                                           (evaluations[eachClass]["TP"] + evaluations[eachClass]["FN"])  # sum each class recall
        sumTP += evaluations[eachClass]["TP"]  # sum each class TP
        sumTPFP += evaluations[eachClass]["TP"]+evaluations[eachClass]["FP"] # sum each class FP
        sumTPFN += evaluations[eachClass]["TP"]+evaluations[eachClass]["FN"] # sum each class FN
    accuracy = correct / (correct + incorrect)
    macroPrecision = macroPrecision/len(evaluations.keys())
    macroRecall = macroRecall / len(evaluations.keys())
    microPrecision = sumTP/sumTPFP  # micro formula
    microRecall = sumTP/sumTPFN
    return accuracy,macroPrecision,macroRecall, microPrecision, microRecall

In [7]:
attributesNum, trainInstancesDict, classLabels = preprocess('train.csv')
priors, model = train(attributesNum,trainInstancesDict,classLabels)
testInstances = preprocessTest('test.csv')
prediction = predict(priors,model,copy.deepcopy(testInstances),attributesNum,classLabels)
accuracy,macroPrecision,macroRecall, microPrecision, microRecall = evaluate(classLabels, prediction,testInstances)
print(accuracy,macroPrecision,macroRecall,microPrecision,microRecall)

0.7413793103448276 0.7191865079365078 0.7371947496947497 0.7413793103448276 0.7413793103448276


In [37]:
# This function produce new features based on the provided data of (x,y) positions of keypoints

def engineerFeatures(filename):
    instances = []
    with open(filename, 'r') as datafile:
        for line in datafile:
            instance = line.strip().split(",")
            for i in range(1,len(instance)):
                instance[i] = float(instance[i])
            instances.append(instance)


    for instance in instances:
        # angle between shoulder, right elbow and right wrist
        if instance[2] < MISSING_VALUE and instance[3] < MISSING_VALUE and instance[4] < MISSING_VALUE:
            angleShoElbWriR = calculateAngle(instance[2],instance[13],instance[3],instance[14],instance[4],instance[15])
        else:
            angleShoElbWriR = MISSING_VALUE
        instance.append(angleShoElbWriR)

        # angle between hip, right knee and right foot
        if instance[7] < MISSING_VALUE and instance[8] < MISSING_VALUE and instance[9] < MISSING_VALUE:
            angleHipKneFooR = calculateAngle(instance[7],instance[18],instance[8],instance[19],instance[9],instance[20])
        else:
            angleHipKneFooR = MISSING_VALUE
        instance.append(angleHipKneFooR)

        # angle between hip, left knee and left foot
        if instance[7] < MISSING_VALUE and instance[10] < MISSING_VALUE and instance[11] < MISSING_VALUE:
            angleHipKneFooL = calculateAngle(instance[7],instance[18],instance[10],instance[21],instance[11],instance[22])
        else:
            angleHipKneFooL = MISSING_VALUE
        instance.append(angleHipKneFooL)

        # angle between shoulder, right elbow and hip
        if instance[3] < MISSING_VALUE and instance[2] < MISSING_VALUE and instance[7] < MISSING_VALUE:
            angleShoElbHipR = calculateAngle(instance[3],instance[14],instance[2],instance[13],instance[7],instance[18])
        else:
            angleShoElbHipR = MISSING_VALUE
        instance.append(angleShoElbHipR)

        # angle between shoulder, left elbow and hip
        if instance[5] <MISSING_VALUE and instance[2] < MISSING_VALUE and instance[7] < MISSING_VALUE:
            angleShoElbHipL = calculateAngle(instance[5],instance[16],instance[2],instance[13],instance[7],instance[18])
        else:
            angleShoElbHipL = MISSING_VALUE
        instance.append(angleShoElbHipL)

        # angle between shoulder, hip and right knee
        if instance[2] < MISSING_VALUE and instance[7] < MISSING_VALUE and instance[8] < MISSING_VALUE:
            angleShoHipKneR = calculateAngle(instance[2],instance[13],instance[7],instance[18],instance[8],instance[19])
        else:
            angleShoHipKneR = MISSING_VALUE
        instance.append(angleShoHipKneR)

        # angle between shoulder hip and left knee
        if instance[2] < MISSING_VALUE and instance[7] < MISSING_VALUE and instance[10] < MISSING_VALUE:
            angleShoHipKneL = calculateAngle(instance[2],instance[13],instance[7],instance[18],instance[10],instance[21])
        else:
            angleShoHipKneL = MISSING_VALUE
        instance.append(angleShoHipKneL)

        # distance between head and right wrist
        if instance[1] < MISSING_VALUE and instance[4] < MISSING_VALUE :
            distHeadWriR = calculateDistance(instance[1],instance[12],instance[4],instance[15])
        else:
            distHeadWriR = MISSING_VALUE
        instance.append(distHeadWriR)

        # distance between head and left wrist
        if instance[1] < MISSING_VALUE and instance[6] < MISSING_VALUE :
            distHeadWriL = calculateDistance(instance[1],instance[12],instance[6],instance[17])
        else:
            distHeadWriL = MISSING_VALUE
        instance.append(distHeadWriL)

        # distance between head and right foot
        if instance[1] < MISSING_VALUE and instance[9] < MISSING_VALUE :
            distHeadFooR = calculateDistance(instance[1],instance[12],instance[9],instance[20])
        else:
            distHeadFooR = MISSING_VALUE
        instance.append(distHeadFooR)
        
        # distance between hip and left foot
        if instance[7] < MISSING_VALUE and instance[11] < MISSING_VALUE :
            distHipFooL = calculateDistance(instance[7],instance[18],instance[11],instance[22])
        else:
            distHipFooL = MISSING_VALUE
        instance.append(distHipFooL)
        
    newFilenamePrefix =re.sub('.csv', '', filename)
    with open(newFilenamePrefix+"_engineer.csv", "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(instances)

In [38]:
def calculateAngle(point1x, point1y, midPointx, midPointy,point2x, point2y):
    angle = math.degrees(
        math.atan2(point2y - midPointy, point2x - midPointx) - math.atan2(point1y - midPointy, point1x - midPointx))
    if angle < 0:
        angle += CIRCLE
    return angle

def calculateDistance(point1x, point1y, point2x, point2y):
    return math.sqrt(math.pow((point1x-point2x),2)+math.pow((point1y-point2y),2))

In [39]:
engineerFeatures("train.csv")
engineerFeatures("test.csv")
attributesNum, trainInstancesDict, classLabels = preprocess('train_engineer.csv')
priors, model = train(attributesNum,trainInstancesDict,classLabels)
testInstances = preprocessTest('test_engineer.csv')
prediction = predict(priors,model,copy.deepcopy(testInstances),attributesNum,classLabels)
accuracy,macroPrecision,macroRecall, microPrecision, microRecall = evaluate(classLabels, prediction,testInstances)
print(accuracy,macroPrecision,macroRecall,microPrecision,microRecall)

0.7758620689655172 0.7868253968253969 0.7921153846153846 0.7758620689655172 0.7758620689655172
