In [1]:
import os
import sys
import csv
import wave
import copy
import math

import numpy as np
import pandas as pd

from sklearn.preprocessing import label_binarize
from sklearn.cross_validation import StratifiedKFold, KFold, train_test_split
from sklearn.svm import OneClassSVM, SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

from keras.models import Sequential, Model
from keras.layers.core import Dense, Activation
from keras.layers import LSTM, Input
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import SGD, Adam, RMSprop

sys.path.append("../")

from utilities.utils import *

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

from IPython.display import clear_output

Using TensorFlow backend.


In [2]:
batch_size = 64
nb_feat = 34
nb_class = 4
nb_epoch = 80

optimizer = 'Adadelta'

In [3]:
params = Constants()
print(params)

----------------------------------------------------------------------------------------------------
available_emotions            ['ang' 'exc' 'neu' 'sad']                                             
conf_matrix_prefix            iemocap                                                               
framerate                     16000                                                                 
path_to_data                  /home/samarth/emotion_recognition-master/code/utilities/../../data/ses
path_to_features              /home/samarth/emotion_recognition-master/code/utilities/../../data/fea
sessions                      ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']          
types                         {1: <class 'numpy.int8'>, 2: <class 'numpy.int16'>, 4: <class 'numpy.i
----------------------------------------------------------------------------------------------------


In [4]:
def get_mocap_head(path_to_mocap_head, filename, start,end, params=Constants()):
    f = open(path_to_mocap_head + filename, 'r').read()
    f = np.array(f.split('\n'))
    mocap_head = []
    mocap_head_avg = []
    f = f[2:]
    counter = 0
    for data in f:
        counter+=1
        data2 = data.split(' ')
        if(len(data2)<2):
            continue
        if(float(data2[1])>start and float(data2[1])<end):
            mocap_head_avg.append(np.array(data2[2:]).astype(np.float))
            
    mocap_head_avg = np.array_split(np.array(mocap_head_avg), 200)
    for spl in mocap_head_avg:
        mocap_head.append(np.mean(spl, axis=0))
    return np.array(mocap_head)

In [79]:
def get_mocap_rot(path_to_mocap_rot, filename, start,end, params=Constants()):
    f = open(path_to_mocap_rot + filename, 'r').read()
    f = np.array(f.split('\n'))
    mocap_rot = []
    mocap_rot_avg = []
    f = f[2:]
    counter = 0
    for data in f:
        counter+=1
        data2 = data.split(' ')
        if(len(data2)<2):
            continue
        if(float(data2[1])>start and float(data2[1])<end):
            mocap_rot_avg.append(np.array(data2[2:]).astype(np.float))
            
    mocap_rot_avg = np.array_split(np.array(mocap_rot_avg), 200)
    for spl in mocap_rot_avg:
        mocap_rot.append(np.mean(spl, axis=0))
    return np.array(mocap_rot)

In [5]:
def read_iemocap_mocap_head(params=Constants()):
    data = []
    ids = {}
    for session in params.sessions:
        path_to_wav = params.path_to_data + session + '/dialog/wav/'
        path_to_emotions = params.path_to_data + session + '/dialog/EmoEvaluation/'
        path_to_transcriptions = params.path_to_data + session + '/dialog/transcriptions/'
        path_to_mocap_head = params.path_to_data + session + '/dialog/MOCAP_head/'

        files2 = os.listdir(path_to_wav)
        #print (files2.sort())
        files = []
        for f in files2:
            if f.endswith(".wav"):
                if f[0] == '.':
                    files.append(f[2:-4])
                else:
                    files.append(f[:-4])
                    
        #files = os.listdir(path_to_wav)
        #files = [f[:-4] for f in files if f.endswith(".wav")]
        for f in files:       
            print(f)
            if (f== 'Ses05M_script01_1b'):
                continue
            transcriptions = get_transcriptions(path_to_transcriptions, f + '.txt')
            emotions = get_emotions(path_to_emotions, f + '.txt')

            for ie, e in enumerate(emotions):
                e.pop("left", None)
                e.pop("right", None)
                e['transcription'] = transcriptions[e['id']]
                e['mocap_head'] = get_mocap_head(path_to_mocap_head, f + '.txt', e['start'], e['end'])
                if e['emotion'] in params.available_emotions:
                    if e['id'] not in ids:
                        data.append(e)
                        ids[e['id']] = 1

                        
    sort_key = get_field(data, "id")
    return np.array(data)[np.argsort(sort_key)]



In [6]:
data = read_iemocap_mocap_head(params=params)

Ses01F_impro02


  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Ses01F_script03_2
Ses01M_script01_2
Ses01F_impro03
Ses01F_impro05
Ses01M_script02_2
Ses01M_impro06
Ses01F_script03_2
Ses01F_script01_3
Ses01M_impro01
Ses01M_impro03
Ses01M_impro04
Ses01F_script01_2
Ses01F_script03_1
Ses01F_impro07
Ses01M_impro02
Ses01F_script02_2
Ses01F_script01_2
Ses01F_script02_2
Ses01M_script02_2
Ses01M_script01_3
Ses01M_script03_2
Ses01F_impro01
Ses01M_script02_1
Ses01F_impro07
Ses01M_script02_1
Ses01M_impro04
Ses01M_script01_2
Ses01F_impro01
Ses01M_script03_1
Ses01M_script01_3
Ses01F_impro02
Ses01M_impro06
Ses01M_impro05
Ses01M_script03_2
Ses01F_script02_1
Ses01M_impro05
Ses01F_impro04
Ses01F_script01_1
Ses01F_impro04
Ses01F_impro03
Ses01M_script01_1
Ses01F_script01_1
Ses01M_script01_1
Ses01M_impro07
Ses01F_script02_1
Ses01M_impro02
Ses01F_impro05
Ses01M_impro03
Ses01F_impro06
Ses01M_impro07
Ses01M_impro01
Ses01F_impro06
Ses01F_script01_3
Ses01F_script03_1
Ses01M_script03_1
Ses02M_impro07
Ses02F_impro07
Ses02F_impro04
Ses02F_impro02
Ses02F_script02_1
Ses02M_impro0

  ret = ret.dtype.type(ret / rcount)


Ses05M_impro01
Ses05F_impro02
Ses05F_impro01
Ses05F_impro08
Ses05F_script01_3
Ses05M_impro04
Ses05F_impro06
Ses05M_script01_1b
Ses05F_impro05
Ses05M_script01_1
Ses05F_impro03
Ses05F_script02_2
Ses05M_script01_1
Ses05F_script01_1
Ses05M_impro06
Ses05F_impro05
Ses05M_impro04
Ses05F_script03_1
Ses05M_impro08
Ses05M_impro02
Ses05M_script01_3
Ses05F_script02_1
Ses05F_script01_2
Ses05M_script01_2
Ses05M_script02_1
Ses05F_impro07
Ses05M_impro07
Ses05F_script03_1
Ses05F_script01_2
Ses05M_script02_1
Ses05F_script02_2
Ses05M_script02_2
Ses05F_impro01
Ses05M_script02_2
Ses05M_impro01
Ses05F_impro06
Ses05F_impro02
Ses05M_script03_2
Ses05F_script01_3
Ses05M_impro05
Ses05M_impro05
Ses05M_script01_1b


In [7]:
import pickle
with open(params.path_to_data + '/../'+'hear.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
    

In [8]:
len(data)

4912

In [74]:
data[2]

{'a': 2.5,
 'd': 2.5,
 'emo_evo': [['neu'], ['neu'], ['neu'], ['neu', 'ang']],
 'emotion': 'neu',
 'end': 11.3925,
 'id': 'Ses01F_impro01_F001',
 'mocap_head': array([[ -3.01623500e+00,  -1.53231875e+01,  -2.58177725e+01,
          -3.50085000e+00,  -1.25055738e+02,  -6.09216000e+00],
        [ -3.03349750e+00,  -1.53290150e+01,  -2.58318275e+01,
          -3.48152000e+00,  -1.25053363e+02,  -6.11257000e+00],
        [ -3.05529000e+00,  -1.53076150e+01,  -2.58582975e+01,
          -3.46259250e+00,  -1.25137442e+02,  -6.16912500e+00],
        [ -3.09448500e+00,  -1.52989275e+01,  -2.58030975e+01,
          -3.51556750e+00,  -1.25170455e+02,  -6.24870500e+00],
        [ -3.15129000e+00,  -1.52903750e+01,  -2.57879075e+01,
          -3.57438250e+00,  -1.25319537e+02,  -6.41077000e+00],
        [ -3.17681000e+00,  -1.52697200e+01,  -2.57643500e+01,
          -3.58836000e+00,  -1.25421822e+02,  -6.54454500e+00],
        [ -3.21464000e+00,  -1.52530050e+01,  -2.57842175e+01,
          -3.598

In [66]:
x_train2 = []
from sklearn.preprocessing import normalize

for ses_mod in data:
    x = ses_mod['mocap_head']
    x_normed = (x - x.min(0)) / x.ptp(0)
    x_normed = x_normed - 0.5
    x_normed[np.isnan(x)]=0
    x_train2.append( x_normed )
    
x_train2 = np.array(x_train2)
x_train2.shape


(4912, 200, 6)

In [57]:
x_train2[0]

array([[-0.5       ,  0.30999731,  0.47602887, -0.46676503, -0.47426847,
        -0.49371681],
       [-0.49839752,  0.30906637,  0.47766798, -0.46579917, -0.46885793,
        -0.49082548],
       [-0.49705638,  0.30734147,  0.47383232, -0.46297283, -0.46441113,
        -0.4886732 ],
       ..., 
       [ 0.47400213, -0.00990041, -0.47495422,  0.46006412,  0.39238546,
         0.47546626],
       [ 0.47389361, -0.00959095, -0.47065307,  0.45697644,  0.39200217,
         0.47513772],
       [ 0.47521485, -0.0080081 , -0.47458004,  0.45594473,  0.38807688,
         0.47439536]])

In [58]:
x_train2[1]

array([[-0.34857534, -0.31906291, -0.13973628, -0.26088676, -0.33896537,
        -0.26562003],
       [-0.3532934 , -0.31933028, -0.13991004, -0.25999571, -0.33886791,
        -0.26878929],
       [-0.35128522, -0.31951476, -0.13999976, -0.25997566, -0.3389048 ,
        -0.26744871],
       ..., 
       [ 0.18948048,  0.5       ,  0.5       , -0.14309488,  0.5       ,
         0.31672145],
       [ 0.18948048,  0.5       ,  0.5       , -0.14309488,  0.5       ,
         0.31672145],
       [ 0.18948048,  0.5       ,  0.5       , -0.14309488,  0.5       ,
         0.31672145]])

In [62]:
x_train2 = x_train2.reshape(-1,1200)
x_train2.shape

(4912, 1200)

In [67]:
Y=[]
for ses_mod in data:
    Y.append(ses_mod['emotion'])
    
Y = to_categorical(Y)

Y.shape

(4912, 4)

In [49]:
from os import listdir
import random
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Conv2D, Dense, Dropout, Reshape, Merge, BatchNormalization, TimeDistributed, Lambda, Activation, LSTM, Flatten, Convolution2D, GRU, MaxPooling1D
from keras.regularizers import l2
from keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
#from keras import initializers
from keras import backend as K
from keras.optimizers import SGD
from keras.optimizers import Adadelta
from keras.utils import np_utils
from keras.preprocessing import sequence
from keras import optimizers
import numpy as np

In [50]:
def build_simple2(nb_class, optimizer='Adam'):
    model = Sequential()
    model.add(Dense(512  , input_shape=(1200,)))
    model.add(Dropout(0.2))
    model.add(Dense(256))
    model.add(Dropout(0.2))
    model.add(Dense(4))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [68]:
def build_simple2(nb_class, optimizer='Adam'):
    model = Sequential()
    model.add(LSTM(128  , input_shape=(200,6) ))
    model.add(Dropout(0.2))
    model.add(Dense(64))
    model.add(Dropout(0.2))
    model.add(Dense(4))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [72]:
model = build_simple2(nb_class)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 128)               69120     
_________________________________________________________________
dropout_16 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_23 (Dense)             (None, 64)                8256      
_________________________________________________________________
dropout_17 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_24 (Dense)             (None, 4)                 260       
_________________________________________________________________
activation_9 (Activation)    (None, 4)                 0         
Total params: 77,636
Trainable params: 77,636
Non-trainable params: 0
_________________________________________________________________


In [73]:
hist = model.fit(x_train2, Y, 
                 batch_size=batch_size, nb_epoch=15, verbose=1, shuffle = True, 
                 validation_split=0.2)



Train on 3929 samples, validate on 983 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [77]:
x1=0
x2=0
x3=0
x4=0
'ang' 'exc' 'neu' 'sad'
for ses_mod in data:
    if (ses_mod['emotion'] == 'ang'):
        x1+=1
    elif (ses_mod['emotion'] == 'exc'):
        x2+=1
    elif (ses_mod['emotion'] == 'neu'):
        x3+=1
    else:
        x4+=1
        
print(x1)
print(x2)
print(x3)
print(x4)


1090
1041
1704
1077


In [80]:
def read_iemocap_mocap_rot(params=Constants()):
    data = []
    ids = {}
    for session in params.sessions:
        path_to_wav = params.path_to_data + session + '/dialog/wav/'
        path_to_emotions = params.path_to_data + session + '/dialog/EmoEvaluation/'
        path_to_transcriptions = params.path_to_data + session + '/dialog/transcriptions/'
        path_to_mocap_rot = params.path_to_data + session + '/dialog/MOCAP_rotated/'

        files2 = os.listdir(path_to_wav)
        #print (files2.sort())
        files = []
        for f in files2:
            if f.endswith(".wav"):
                if f[0] == '.':
                    files.append(f[2:-4])
                else:
                    files.append(f[:-4])
                    
        #files = os.listdir(path_to_wav)
        #files = [f[:-4] for f in files if f.endswith(".wav")]
        for f in files:       
            print(f)
            if (f== 'Ses05M_script01_1b'):
                continue
            transcriptions = get_transcriptions(path_to_transcriptions, f + '.txt')
            emotions = get_emotions(path_to_emotions, f + '.txt')

            for ie, e in enumerate(emotions):
                e.pop("left", None)
                e.pop("right", None)
                e['transcription'] = transcriptions[e['id']]
                e['mocap_rot'] = get_mocap_rot(path_to_mocap_rot, f + '.txt', e['start'], e['end'])
                if e['emotion'] in params.available_emotions:
                    if e['id'] not in ids:
                        data.append(e)
                        ids[e['id']] = 1

                        
    sort_key = get_field(data, "id")
    return np.array(data)[np.argsort(sort_key)]

In [None]:
data = read_iemocap_mocap_rot(params=params)

Ses01F_impro02


  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Ses01F_script03_2
Ses01M_script01_2
Ses01F_impro03
Ses01F_impro05
Ses01M_script02_2
Ses01M_impro06
Ses01F_script03_2
Ses01F_script01_3
Ses01M_impro01
Ses01M_impro03


In [5]:
import pickle
with open(params.path_to_data + '/../'+'rotate.pickle', 'rb') as handle:
    data2 = pickle.load(handle)

In [11]:
data2[0]

{'a': 2.5,
 'd': 2.5,
 'emo_evo': [['neu'], ['neu'], ['neu'], ['neu']],
 'emotion': 'neu',
 'end': 8.2357,
 'id': 'Ses01F_impro01_F000',
 'mocap_rot': array([[ -28.166955,   33.83828 ,  -54.437235, ...,  -45.552485,
           52.78455 ,  129.792845],
        [ -28.16234 ,   33.891575,  -54.37245 , ...,  -45.564395,
           52.978755,  129.856305],
        [ -28.19076 ,   33.97728 ,  -54.39008 , ...,  -45.58896 ,
           53.056005,  129.88101 ],
        ..., 
        [ -28.12114 ,   34.79247 ,  -56.04058 , ...,  -45.36128 ,
           52.5758  ,  129.79436 ],
        [ -28.11692 ,   34.77052 ,  -56.0277  , ...,  -45.34806 ,
           52.53871 ,  129.78537 ],
        [ -28.0848  ,   34.75438 ,  -56.03502 , ...,  -45.33584 ,
           52.48602 ,  129.75684 ]]),
 'start': 6.2901,
 'transcription': 'Excuse me.',
 'v': 2.5}

In [28]:
x_train2 = []
from sklearn.preprocessing import normalize

for ses_mod in data2:
    x = ses_mod['mocap_rot']
    x[np.isnan(x)]=0
    #x_normed = (x - x.min(0)) / x.ptp(0)
    #x_normed = x_normed - 0.5
    #x_normed[np.isnan(x)]=0
    x_train2.append( x )
    
x_train2 = np.array(x_train2)
x_train2.shape


(4912, 200, 165)

In [30]:
x_train2[1]

array([[ -28.32791,   34.75257,  -56.31461, ...,  -45.34444,   52.31502,
         129.84094],
       [ -28.33381,   34.77408,  -56.32875, ...,  -45.33795,   52.43037,
         129.80971],
       [ -28.32203,   34.74413,  -56.33002, ...,  -45.35648,   52.33263,
         129.83345],
       ..., 
       [   0.     ,    0.     ,    0.     , ...,    0.     ,    0.     ,
           0.     ],
       [   0.     ,    0.     ,    0.     , ...,    0.     ,    0.     ,
           0.     ],
       [   0.     ,    0.     ,    0.     , ...,    0.     ,    0.     ,
           0.     ]])

In [31]:
x_train2 = x_train2.reshape(-1,200,165,1)

In [32]:
Y=[]
for ses_mod in data2:
    Y.append(ses_mod['emotion'])
    
Y = to_categorical(Y)

Y.shape

(4912, 4)

In [17]:
from os import listdir
import random
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Conv2D, Dense, Dropout, Reshape, Merge, BatchNormalization, TimeDistributed, Lambda, Activation, LSTM, Flatten, Convolution2D, GRU, MaxPooling1D
from keras.regularizers import l2
from keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
#from keras import initializers
from keras import backend as K
from keras.optimizers import SGD
from keras.optimizers import Adadelta
from keras.utils import np_utils
from keras.preprocessing import sequence
from keras import optimizers
import numpy as np

In [33]:
def build_simple_conv(nb_feat, nb_class, optimizer='Adam'):
    model = Sequential()
    model.add(Conv2D(32, 3, strides=(2, 2), border_mode='same', input_shape=(200, 165, 1)))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(64, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(64, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(128, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(128, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dropout(0.2))
    model.add(Dense(256))
    model.add(Activation('relu')) 
    model.add(Dropout(0.2))
    model.add(Dense(4))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [34]:
model = build_simple_conv(nb_feat, nb_class)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 100, 83, 32)       320       
_________________________________________________________________
dropout_15 (Dropout)         (None, 100, 83, 32)       0         
_________________________________________________________________
activation_15 (Activation)   (None, 100, 83, 32)       0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 50, 42, 64)        18496     
_________________________________________________________________
dropout_16 (Dropout)         (None, 50, 42, 64)        0         
_________________________________________________________________
activation_16 (Activation)   (None, 50, 42, 64)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 25, 21, 64)        36928     
__________

  This is separate from the ipykernel package so we can avoid doing imports until
  
  if __name__ == '__main__':
  if sys.path[0] == '':
  from ipykernel import kernelapp as app


In [35]:
hist = model.fit(x_train2, Y, 
                 batch_size=batch_size, nb_epoch=30, verbose=1, shuffle = True, 
                 validation_split=0.2)



Train on 3929 samples, validate on 983 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [39]:
def build_simple_conv(nb_feat, nb_class, optimizer='SGD'):
    model = Sequential()
    model.add(Conv2D(32, 3, strides=(2, 2), border_mode='same', input_shape=(200, 165, 1)))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(64, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(64, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))
    model.add(Conv2D(128, 3, strides=(2, 2), border_mode='same'))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu')) 
    model.add(Dropout(0.2))
    model.add(Dense(256))
    model.add(Activation('relu')) 
    model.add(Dropout(0.2))
    model.add(Dense(4))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [40]:
model = build_simple_conv(nb_feat, nb_class)
model.summary()

  This is separate from the ipykernel package so we can avoid doing imports until
  
  if __name__ == '__main__':
  if sys.path[0] == '':


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_20 (Conv2D)           (None, 100, 83, 32)       320       
_________________________________________________________________
dropout_28 (Dropout)         (None, 100, 83, 32)       0         
_________________________________________________________________
activation_28 (Activation)   (None, 100, 83, 32)       0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 50, 42, 64)        18496     
_________________________________________________________________
dropout_29 (Dropout)         (None, 50, 42, 64)        0         
_________________________________________________________________
activation_29 (Activation)   (None, 50, 42, 64)        0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 25, 21, 64)        36928     
__________

In [41]:
hist = model.fit(x_train2, Y, 
                 batch_size=batch_size, nb_epoch=30, verbose=1, shuffle = True, 
                 validation_split=0.2)



Train on 3929 samples, validate on 983 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [42]:
x_train2 = []
from sklearn.preprocessing import normalize

for ses_mod in data2:
    x = ses_mod['mocap_rot']
    x[np.isnan(x)]=0
    #x_normed = (x - x.min(0)) / x.ptp(0)
    #x_normed = x_normed - 0.5
    #x_normed[np.isnan(x)]=0
    x_train2.append( x )
    
x_train2 = np.array(x_train2)
x_train2.shape

(4912, 200, 165)

In [50]:
def build_simple_lstm(nb_feat, nb_class, optimizer='Adadelta'):
    model = Sequential()
    model.add(LSTM(512, return_sequences=True, input_shape=(200, 165)))
    model.add(Activation('tanh'))
    model.add(LSTM(256, return_sequences=False))
    model.add(Activation('tanh'))
    model.add(Dense(512))
    model.add(Activation('tanh'))
    model.add(Dense(nb_class))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [51]:
model = build_simple_lstm(nb_feat, nb_class)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 200, 512)          1388544   
_________________________________________________________________
activation_48 (Activation)   (None, 200, 512)          0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 256)               787456    
_________________________________________________________________
activation_49 (Activation)   (None, 256)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 512)               131584    
_________________________________________________________________
activation_50 (Activation)   (None, 512)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 4)                 2052      
__________

In [52]:
hist = model.fit(x_train2, Y, 
                 batch_size=batch_size, nb_epoch=30, verbose=1, shuffle = True, 
                 validation_split=0.2)



Train on 3929 samples, validate on 983 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [54]:
import pickle
with open(params.path_to_data + '/../'+'hear.pickle', 'rb') as handle:
    data2 = pickle.load(handle)

In [64]:
x_train2 = []
from sklearn.preprocessing import normalize

for ses_mod in data2:
    x = ses_mod['mocap_head']
    x[np.isnan(x)]=0
    #x_normed = (x - x.min(0)) / x.ptp(0)
    #x_normed = x_normed - 0.5
    #x_normed[np.isnan(x)]=0
    x_train2.append( x )
    
x_train2 = np.array(x_train2)
x_train2.shape

(4912, 200, 6)

In [65]:
Y=[]
for ses_mod in data2:
    Y.append(ses_mod['emotion'])
    
Y = to_categorical(Y)

Y.shape

(4912, 4)

In [66]:
def build_simple_lstm(nb_feat, nb_class, optimizer='Adadelta'):
    model = Sequential()
    model.add(LSTM(256, return_sequences=False, input_shape=(200, 6)))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dense(nb_class))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [67]:
model = build_simple_lstm(nb_feat, nb_class)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 256)               269312    
_________________________________________________________________
dense_24 (Dense)             (None, 128)               32896     
_________________________________________________________________
activation_54 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 4)                 516       
_________________________________________________________________
activation_55 (Activation)   (None, 4)                 0         
Total params: 302,724
Trainable params: 302,724
Non-trainable params: 0
_________________________________________________________________


In [68]:
hist = model.fit(x_train2, Y, 
                 batch_size=batch_size, nb_epoch=30, verbose=1, shuffle = True, 
                 validation_split=0.2)



Train on 3929 samples, validate on 983 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
 384/3929 [=>............................] - ETA: 15s - loss: 0.9119 - acc: 0.6250

KeyboardInterrupt: 

In [70]:
path_to_avi = params.path_to_data + 'Session1' + '/dialog/MOCAP_hand/Ses01F_impro01.txt'

In [81]:
def get_mocap_hand(path_to_mocap_hand, filename, start,end, params=Constants()):
    f = open(path_to_mocap_hand + filename, 'r').read()
    f = np.array(f.split('\n'))
    mocap_hand = []
    mocap_hand_avg = []
    f = f[2:]
    counter = 0
    for data in f:
        counter+=1
        data2 = data.split(' ')
        if(len(data2)<2):
            continue
        if(float(data2[1])>start and float(data2[1])<end):
            mocap_hand_avg.append(np.array(data2[2:]).astype(np.float))
            
    mocap_hand_avg = np.array_split(np.array(mocap_hand_avg), 200)
    for spl in mocap_hand_avg:
        mocap_hand.append(np.mean(spl, axis=0))
    return np.array(mocap_hand)

In [82]:
def read_iemocap_mocap_hand(params=Constants()):
    data = []
    ids = {}
    for session in params.sessions:
        path_to_wav = params.path_to_data + session + '/dialog/wav/'
        path_to_emotions = params.path_to_data + session + '/dialog/EmoEvaluation/'
        path_to_transcriptions = params.path_to_data + session + '/dialog/transcriptions/'
        path_to_mocap_hand = params.path_to_data + session + '/dialog/MOCAP_hand/'

        files2 = os.listdir(path_to_wav)
        #print (files2.sort())
        files = []
        for f in files2:
            if f.endswith(".wav"):
                if f[0] == '.':
                    files.append(f[2:-4])
                else:
                    files.append(f[:-4])
                    
        #files = os.listdir(path_to_wav)
        #files = [f[:-4] for f in files if f.endswith(".wav")]
        for f in files:       
            print(f)
            mocap_f = f
            if (f== 'Ses05M_script01_1b'):
                mocap_f = 'Ses05M_script01_1' 
            transcriptions = get_transcriptions(path_to_transcriptions, f + '.txt')
            emotions = get_emotions(path_to_emotions, f + '.txt')

            for ie, e in enumerate(emotions):
                e.pop("left", None)
                e.pop("right", None)
                e['transcription'] = transcriptions[e['id']]
                e['mocap_hand'] = get_mocap_hand(path_to_mocap_hand, mocap_f + '.txt', e['start'], e['end'])
                if e['emotion'] in params.available_emotions:
                    if e['id'] not in ids:
                        data.append(e)
                        ids[e['id']] = 1

                        
    sort_key = get_field(data, "id")
    return np.array(data)[np.argsort(sort_key)]

In [84]:
data = read_iemocap_mocap_hand(params=params)

Ses01F_impro02


  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Ses01F_script03_2
Ses01M_script01_2
Ses01F_impro03
Ses01F_impro05
Ses01M_script02_2
Ses01M_impro06
Ses01F_script03_2
Ses01F_script01_3
Ses01M_impro01
Ses01M_impro03
Ses01M_impro04
Ses01F_script01_2
Ses01F_script03_1
Ses01F_impro07
Ses01M_impro02
Ses01F_script02_2
Ses01F_script01_2
Ses01F_script02_2
Ses01M_script02_2
Ses01M_script01_3
Ses01M_script03_2
Ses01F_impro01
Ses01M_script02_1
Ses01F_impro07
Ses01M_script02_1
Ses01M_impro04
Ses01M_script01_2
Ses01F_impro01
Ses01M_script03_1
Ses01M_script01_3
Ses01F_impro02
Ses01M_impro06
Ses01M_impro05
Ses01M_script03_2
Ses01F_script02_1
Ses01M_impro05
Ses01F_impro04
Ses01F_script01_1
Ses01F_impro04
Ses01F_impro03
Ses01M_script01_1
Ses01F_script01_1
Ses01M_script01_1
Ses01M_impro07
Ses01F_script02_1
Ses01M_impro02
Ses01F_impro05
Ses01M_impro03
Ses01F_impro06
Ses01M_impro07
Ses01M_impro01
Ses01F_impro06
Ses01F_script01_3
Ses01F_script03_1
Ses01M_script03_1
Ses02M_impro07
Ses02F_impro07
Ses02F_impro04
Ses02F_impro02
Ses02F_script02_1
Ses02M_impro0

  ret = ret.dtype.type(ret / rcount)


Ses05M_impro01
Ses05F_impro02
Ses05F_impro01
Ses05F_impro08
Ses05F_script01_3
Ses05M_impro04
Ses05F_impro06
Ses05M_script01_1b
Ses05F_impro05
Ses05M_script01_1
Ses05F_impro03
Ses05F_script02_2
Ses05M_script01_1
Ses05F_script01_1
Ses05M_impro06
Ses05F_impro05
Ses05M_impro04
Ses05F_script03_1
Ses05M_impro08
Ses05M_impro02
Ses05M_script01_3
Ses05F_script02_1
Ses05F_script01_2
Ses05M_script01_2
Ses05M_script02_1
Ses05F_impro07
Ses05M_impro07
Ses05F_script03_1
Ses05F_script01_2
Ses05M_script02_1
Ses05F_script02_2
Ses05M_script02_2
Ses05F_impro01
Ses05M_script02_2
Ses05M_impro01
Ses05F_impro06
Ses05F_impro02
Ses05M_script03_2
Ses05F_script01_3
Ses05M_impro05
Ses05M_impro05
Ses05M_script01_1b


In [85]:
import pickle
with open(params.path_to_data + '/../'+'hand.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [123]:
x_train2 = []
from sklearn.preprocessing import normalize
counter = 0
for ses_mod in data:
    x = ses_mod['mocap_head']
    if(x.shape != (200,18)):
        x = np.zeros((200,18))
        
    x[np.isnan(x)]=0
    #x_normed = (x - x.min(0)) / x.ptp(0)
    #x_normed = x_normed - 0.5
    #x_normed[np.isnan(x)]=0
    x_train2.append( x )
    
x_train2 = np.array(x_train2)

In [126]:
x_train2.shape

(4936, 200, 18)

In [131]:
Y=[]
for ses_mod in data:
    Y.append(ses_mod['emotion'])
    
Y = to_categorical(Y)

Y.shape

(4936, 4)

In [138]:
def build_simple_lstm(nb_feat, nb_class, optimizer='SGD'):
    model = Sequential()
    model.add(LSTM(256, return_sequences=False, input_shape=(200, 18)))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dense(nb_class))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [139]:
model = build_simple_lstm(nb_feat, nb_class)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_13 (LSTM)               (None, 256)               281600    
_________________________________________________________________
activation_62 (Activation)   (None, 256)               0         
_________________________________________________________________
dense_32 (Dense)             (None, 128)               32896     
_________________________________________________________________
activation_63 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_33 (Dense)             (None, 4)                 516       
_________________________________________________________________
activation_64 (Activation)   (None, 4)                 0         
Total params: 315,012
Trainable params: 315,012
Non-trainable params: 0
_________________________________________________________________


In [140]:
hist = model.fit(x_train2, Y, 
                 batch_size=batch_size, nb_epoch=10, verbose=1, shuffle = True, 
                 validation_split=0.2)



Train on 3948 samples, validate on 988 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
