In [180]:
from __future__ import division
import glob
import scipy.stats
import numpy as np
import matplotlib
import copy
import random 
import math


#-----------------------------------------------------------------------------------------------------------------#   
#------------------------------------------CLASSIFIER CODE--------------------------------------------------------#
#-----------------------------------------------------------------------------------------------------------------#

#AUIntensity Classifier
photoData = {}

dictAuTranslation = {0 : "AU01", 1 : "AU02", 2 :"AU04", 3:"AU05", 4:"AU06",5:"AU07",6:"AU09",7:"AU10",8:"AU12",9:"AU14",10:"AU15",
                     11:"AU17",12:"AU20",13:"AU23",14:"AU25",15:"AU26",16:"AU28",17:"AU45"}

def getRoundedPhotoData():
    traitFile = open("C:/Users/rrenv/Desktop/Level4Project/averageRatings.csv", "r")
    for line in traitFile:
        line = line[:-1]
        values = line.split(",")
        traitValues = []
        if (values[0][:12] not in photoData):
            photoData[values[0][:12]] = []
            for trait in values[1:]:
                traitValues.append(round(float(trait)))
            photoData[values[0][:12]].append(traitValues)
    traitFile.close()
    #print len(photoData)
    filesToGoThrough = glob.glob("C:/Users/rrenv/Desktop/Level4Project/OpenFace_0.2_win_x64/data/outputFiles/*.pts")
    for file in filesToGoThrough:
        auIntensities = []
        auActivations = []
        headPose = []
        filename = file.split("/")[-1][12:][:12]
        currentFile = open(file, "r")
        listOfLines = currentFile.readlines()
        for i in range(0,3):
            value=listOfLines[74].split(" ")[i]
            if (i == 2):
                headPose.append(float(value[0:-1]))
            else:
                headPose.append(float(value))
        photoData[filename].append(headPose)
        for i in range(0,17):
            value=listOfLines[82+i].split(" ")[1]
            auIntensities.append(float(value[0:-1]))
        photoData[filename].append(auIntensities)
        for i in range(0,18):
            value=listOfLines[102+i].split(" ")[1]
            auActivations.append(float(value))
        photoData[filename].append(auActivations)
        currentFile.close()
        
#Split into Training and Testing

def splitData(data, ratio):
    trainingSize = int(len(data) * ratio)
    trainSet = []
    copyOfData = copy.copy(data)
    while (len(trainSet) < trainingSize):
        trainSet.append(copyOfData.pop(random.choice(copyOfData.keys())))
    return [trainSet, copyOfData]

#Separate Data by Class

def assignOnScale(dataset, variable): #Variable will mean which of the 5 personality traits you are trying to classify
    assigned = {}
    for i in range(len(dataset)):
        #print "working with: ", dataset[i]
        vector = dataset[i][0][variable]
        if (vector not in assigned):
            assigned[vector] = []
        for j in range(1,len(dataset[i])):
            assigned[vector].append(dataset[i][j])
    return assigned

#Hard coded for AU Intensities; easier for testing and time constraints as it's the most interesting

def assignOnScaleModest(dataset): 
    assigned = {1 : [], 2: []}
    for i in range(len(dataset)):
        vector = dataset[i][0][2]
        if vector >= 0:
            assigned[2].append(dataset[i][2])
        else:
            assigned[1].append(dataset[i][2])
    #print assigned
    return assigned

#Get Mean for each Attribute

def getMean(feature):
    return (sum(feature)/len(feature))

#Get Standard Deviation

def getStdDev(feature):
    mean = getMean(feature)
    variance = sum([pow(x-mean,2) for x in feature])/float(len(feature)-1)
    return np.sqrt(variance)

#Summarise Dataset

def getAllFeatures(dataset): #parameter is a variable to choose either AUActivation, Intensity, or HeadPose
    features = {}
    for measurement in dataset:
        #print measurement, dataset[measurement]
        for listOfFeatures in measurement:
            #print listOfFeatures
            for x in range (0, len(listOfFeatures)):
                if dictAuTranslation[x] in features:
                    features[dictAuTranslation[x]].append(listOfFeatures[x])
                else:
                    features[dictAuTranslation[x]] = []
                    features[dictAuTranslation[x]].append(listOfFeatures[x])
    #print features
    return features

getRoundedPhotoData()
trainSet, testSet = splitData(photoData, 0.01)
trainSetfeatures = assignOnScaleModest(trainSet)
print "getAllFeatures test: ", 'passed!'
#getAllFeatures(trainSetfeatures)


    
def summariseFeatures(dataset):
    features = getAllFeatures(dataset)
    summaries = {}
    print len(features)
    for x in range(0,len(features)):
        mean = getMean(features[dictAuTranslation[x]])
        stdDev = getStdDev(features[dictAuTranslation[x]])
        summaries[dictAuTranslation[x]] = []
        summaries[dictAuTranslation[x]].append(float(mean))
        summaries[dictAuTranslation[x]].append(float(stdDev))
    return summaries

print "summariseFeatures test: ", 'passed!'
featureSummaries = summariseFeatures(trainSetfeatures)
#print featureSummaries
#Summarise Attributes by Class

def summariseClasses(dataset):
    assigned = assignOnScaleModest(dataset)
    summaries = {}
    for classValue, instances in assigned.iteritems():
        print instances
        for instance in instances:
            print instance
            summaries[classValue] = summariseFeatures(instances)
    print summaries
    return summaries

print "summariseClasses test: "
classSummaries = summariseClasses(trainSet)
print classSummaries

#Make Predictions based on Probabilities

#Gaussian Probablilty (probability of a given value given mean and standard deviation)

def findprobability(value, mean, stdev): # value is what we're looking for the probability of
    exponent = math.exp(-(math.pow(value-mean,2)/(2*math.pow(stdev,2))))
    return (1/(math.sqrt(2*math.pi)*stdev))*exponent

#Apply to classes in our data

def calculateClassProbabilities(classSummaries, inputVector):
    probabilities = {}
    for classValue, classSummaries in summaries.iteritems():
        probabilities[classValue] = 1
        for i in range(0, len(classSummaries)):
            mean, stdev = classSummaries
            x = inputVector[i]
            if (stdev != 0):
                probabilities[classValue] *= findprobability(x, mean, stdev)
    return probabilities
    
getRoundedPhotoData()
trainSet, testSet = splitData(photoData, 0.01)
trainSetfeatures = assignOnScaleModest(trainSet)
#print trainSetfeatures
#print len(trainSet)
#for feature, measurements in trainSetfeatures.iteritems():
#    for measurement in measurements:
#        print feature, ': ', len(measurements[1])
#features = getAllFeatures(trainSet, 2)
inputVector = testSet[random.choice(testSet.keys())][2]

print 'input vector: ', inputVector
calculateClassProbabilities(classSummaries, inputVector)

getAllFeatures test:  passed!
summariseFeatures test:  passed!
17
summariseClasses test: 
[[1.78664, 1.29407, 1.42938, 0.0, 0.0, 0.813071, 0.0, 0.0, 0.0, 0.0, 0.415276, 0.504283, 0.0, 0.00625724, 0.0, 0.571534, 0.859951], [0.0, 0.0, 0.0, 0.325211, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.346812, 0.0818186, 0.0, 0.22692, 0.0, 0.0, 0.611511], [0.0537428, 0.0, 0.0, 0.351887, 0.604677, 0.0, 1.07561, 0.41141, 0.547717, 1.09836, 0.792746, 0.0, 0.890492, 0.0, 0.0, 0.574416, 0.337656]]
[1.78664, 1.29407, 1.42938, 0.0, 0.0, 0.813071, 0.0, 0.0, 0.0, 0.0, 0.415276, 0.504283, 0.0, 0.00625724, 0.0, 0.571534, 0.859951]


TypeError: list indices must be integers, not list