In [12]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from data_loader import data_loader
import librosa
from tqdm import tqdm

In [13]:
CLASSES6 = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad']
CLASSES7 = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [14]:
# Load the model.
model = hub.load('https://tfhub.dev/google/vggish/1')

In [15]:
dl = data_loader(
    Crema_path='../Datasets/Crema/',
    Ravdess_path='../Datasets/Ravdess/',
    Savee_path='../Datasets/Savee/',
    Tess_path='../Datasets/Tess/'
)

In [16]:
def get_embeddings(filename, label):
    data, sr = librosa.load(filename, sr=None, mono=True, res_type='kaiser_best')

    if sr != 16000:
        data = librosa.resample(y=data, orig_sr=sr, target_sr=16000, res_type='kaiser_best')

    embeddings = model(data)
    num_embeddings = tf.shape(embeddings)[0]
    return embeddings, tf.repeat(label, num_embeddings)

In [17]:
tess = dl.get_tess_df()
savee = dl.get_savee_df()
ravdess = dl.get_ravdess_df()
crema = dl.get_crema_df()
all_en = dl.get_all_df()

In [18]:
def save_embeddings(df, filename):
    features = []
    labels = []

    if df['Emotion'].nunique() == 6:
        classes = CLASSES6
    else:
        classes = CLASSES7

    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        embs, label = get_embeddings(row['File_Path'], row['Emotion'])
        assert(len(embs) == len(label))
        for i in range(embs.shape[0]):
            features.append(embs[i])
            labels.append(classes.index(label[i]))
    
    features = np.array(features)
    labels = np.array(labels)
    np.save(filename + 'X.npy', features)
    np.save(filename + 'y.npy', labels)

In [19]:
save_embeddings(tess, './features/vggish_emb/tess/')

100%|██████████| 2800/2800 [06:02<00:00,  7.71it/s]


In [20]:
save_embeddings(savee, './features/vggish_emb/savee/')

100%|██████████| 480/480 [01:42<00:00,  4.67it/s]


In [21]:
save_embeddings(ravdess, './features/vggish_emb/ravdess/')

100%|██████████| 1440/1440 [04:12<00:00,  5.71it/s]


In [22]:
save_embeddings(crema, './features/vggish_emb/crema/')

100%|██████████| 7442/7442 [07:26<00:00, 16.67it/s]


In [23]:
save_embeddings(all_en, './features/vggish_emb/all/')

100%|██████████| 12162/12162 [18:35<00:00, 10.90it/s]


# Testing

In [24]:
X = np.load('./features/vggish_emb/savee/X.npy', allow_pickle=True)
y = np.load('./features/vggish_emb/savee/y.npy', allow_pickle=True)

In [25]:
X.shape, y.shape

((1672, 128), (1672,))

In [26]:
X[200]

array([ 0.6197338 , -0.29157487, -0.09512645, -0.87194926, -0.07587396,
        0.15066339, -0.6169818 ,  0.12419615, -1.0076988 ,  0.13313988,
       -1.181723  , -0.4617458 , -0.82125384, -0.5083156 , -0.3155771 ,
       -0.03422102, -0.33547163,  0.17200038,  0.05936712,  0.05832529,
        0.37256032, -0.14059591, -0.31094685,  0.25461078,  0.16009496,
        0.34995678,  0.32597387,  0.34991467, -0.3154465 , -0.30754793,
       -0.15141848, -0.13154104,  0.10227065,  0.09376562,  0.11562926,
       -0.0591009 , -0.9396451 , -0.24463545, -0.6627543 , -0.23763686,
        0.32678533, -0.4947036 ,  0.19601218, -0.50604415, -0.04912162,
       -0.06531596,  0.2793016 ,  0.2311399 ,  0.12276937,  0.56661755,
        0.20908324, -0.6384721 ,  0.4149729 , -1.0539478 ,  0.37674472,
       -0.25910258,  0.15177324, -1.0351121 ,  0.25636894,  0.2925262 ,
        0.35122687, -0.18634926, -0.42906755, -0.4676947 , -0.3306368 ,
       -0.15869156, -0.21102369,  0.8933415 , -0.3373026 ,  0.40

In [27]:
y[200]

3