# Audio model evaluation

In [2]:
import os
import pickle
import pandas as pd
from pyAudioAnalysis import audioTrainTest as aT

labels = pd.read_csv("project_data/labels.csv", usecols=[0,1], dtype={"id": int, "label": str}, index_col=0)

audio_results = "audio_predictions/"
def get_label_by_story(subject, story):
    return labels.loc[subject].values[0][story-1]

def lou_cross_validation_audio(audio_path, save_results=None):
    save_results = audio_path if save_results==None else save_results
    all_files = []
    for root, dirs, files in os.walk(audio_path):
        for f in files:
            all_files.append(os.path.join(root, f))
    predictions = {}
    for trfile in all_files:
        originalname = trfile
        inter = originalname.split("/")[-1].strip("sub").strip(".wav").split("_")
        sub, st = int(inter[0]), int(inter[1])
        movedname = originalname.replace(originalname.split("/")[1], "fold")
        os.rename(originalname, movedname)
        aT.featureAndTrain([audio_path + "/lie", audio_path + "/truth"], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "audio_models/svm/svm", True)
        prediction = aT.fileClassification(movedname, "audio_models/svm/svm", "svm")[1][1]
        prediction = 1 if prediction>0.5 else 0
        predictions[(sub, st)] = [prediction, get_label_by_story(sub, st)]
        os.rename(movedname, originalname)

    with open(audio_results + save_results + ".pickle", "wb") as f:
        pickle.dump(predictions, f)

control_group = "audio_control_group"
test_group = "audio_test_group"
complete_group = "audio_complete_group"
lou_cross_validation_audio(control_group)
lou_cross_validation_audio(test_group)
lou_cross_validation_audio(complete_group)

TypeError: 'int' object has no attribute '__getitem__'

In [80]:
import pickle
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_squared_error as mse

def deserialize(f):
    with open(f, "rb") as ser:
        return pickle.load(ser)


def evaluate_predictions(root):
    for root, dirs, files in os.walk(root):
        for f in files:
            predictions = deserialize(os.path.join(root, f))
            results = np.array(predictions.values()).astype(int)
        #     results mse(results[:, 0], results[:, 1])
#             results = results[results[:,1]==0]
#             print results
            
            results = results[:, 0] - results[:, 1]
            results = (len(results) - sum(abs(results)))/float(len(results))
            f = f.split(".")[0]
            print "Correct classification rate of {}: {:.4f}%".format(f, results*100)

evaluate_predictions("audio_predictions")
print
evaluate_predictions("thermal_predictions")

Correct classification rate of audio_control_group: 77.7778%
Correct classification rate of audio_test_group: 72.7273%
Correct classification rate of audio_complete_group: 62.3188%

Correct classification rate of knn_control: 27.7778%
Correct classification rate of knn_test: 26.6667%
Correct classification rate of knn_complete: 28.7879%
Correct classification rate of svm_control: 61.1111%
Correct classification rate of svm_test: 66.6667%
Correct classification rate of lc_control: 38.8889%
Correct classification rate of lc_complete: 42.4242%
Correct classification rate of lc_test: 46.6667%
Correct classification rate of base_diff_test: 66.6667%
Correct classification rate of base_diff_control: 55.5556%
Correct classification rate of base_diff_complete: 60.6061%


# Thermal model evaluation

In [79]:
import pickle 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import numpy as np
import os

thermal_predictions = "thermal_predictions/"
label_folder = "project_data/labels.csv"
game = pd.read_csv(label_folder, usecols=[0,3], dtype={"id": int, "game": int})
game = game.drop([0,1,3]).values
test = game[game[:,1]==1]
control = game[game[:,1]==0]

def deserialize(f):
    with open(f, "rb") as ser:
        return pickle.load(ser)

baseline = deserialize("thermal_data/baseline.pickle")

def mean_predict(data, from_subj, fname, method='base_diff'):
    preds = {}
    for subject, game in from_subj:
        sub_base = baseline[subject]
        st1 = np.vstack(np.vstack(data[(subject, 1)])[:,0]) - sub_base
        st2 = np.vstack(np.vstack(data[(subject, 2)])[:,0]) - sub_base
        st3 = np.vstack(np.vstack(data[(subject, 3)])[:,0]) - sub_base
        st1, st2, st3 = sum(st1)/len(st1), sum(st2)/len(st2), sum(st3)/len(st3)
        st1, st2, st3 = [1, np.sqrt(st1.dot(st1))], [2, np.sqrt(st2.dot(st2))], [3, np.sqrt(st3.dot(st3))]
        strs = np.vstack((st1, st2, st3))
        lie = np.where(strs == np.max(strs[:,1]))[0][0]
        for i in range(1,4):
            p = 0 if(i==lie) else 1
            preds[(subject, i)] = [p, data[(subject, i)][0][1]]
        
    with open(os.path.join(thermal_predictions, method, method + "_" + fname+".pickle"), "wb") as f:
        pickle.dump(preds, f)
        
#     for key in data.keys():
#         test = (key, data[key])
#         del data[key]
#         train = np.vstack(data.values())
#         nclass.fit(np.vstack(train[:,0]), train[:,1].astype(int))
#         knnpred = nclass.predict(np.vstack(test[1][:,0]))
#         truth, lie = sum(knnpred), len(knnpred)-sum(knnpred)
#         p = 1 if truth > lie else 0
#         preds[key] = [p, test[1][0][1]]
#         data[key] = test[1]

complete_group = {}
control_group = deserialize("thermal_data/control_group.pickle")
test_group = deserialize("thermal_data/test_group.pickle")
complete_group.update(control_group)
complete_group.update(test_group)
mean_predict(control_group, control, 'control')
mean_predict(test_group, test, 'test')
mean_predict(complete_group, game, 'complete')

In [76]:
import numpy as np
np.random.seed(2)
x = np.vstack(10*np.array(np.random.random((10, 5))))
print x
y = np.array(range(1,6))
np.where(x == np.max(x))[0]

[[ 4.35994902  0.25926232  5.49662478  4.35322393  4.20367802]
 [ 3.30334821  2.04648634  6.19270966  2.99654674  2.66827275]
 [ 6.21133833  5.29142094  1.34579945  5.13578121  1.84439866]
 [ 7.85335148  8.53975293  4.94236837  8.46561485  0.79645477]
 [ 5.0524609   0.65286504  4.28122328  0.96530916  1.27159972]
 [ 5.96745309  2.26012001  1.06945684  2.20306207  3.49826285]
 [ 4.67787485  2.01743226  6.40406725  4.83069836  5.0523672 ]
 [ 3.86892651  7.93637454  5.80004179  1.62298599  7.00752347]
 [ 9.6455108   5.00008361  8.89520064  3.41613653  5.67144128]
 [ 4.27545963  4.36747263  7.76559185  5.35604173  9.53742227]]


(array([8]), array([0]))