In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
import os
os.chdir('/content/drive/My Drive/multimodal_sarcasm_dataset/data/features/')

In [31]:
import tensorflow as tf
import tensorflow_hub as hub
import os 
import pickle
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import numpy as np

In [32]:
with open('feat_dict_context_librosa_lld_trill16k.pickle', 'rb') as f:
    librosa_audio_features = pickle.load(f, encoding='latin1')

In [33]:
labels = pd.read_csv("scene_labels.csv")

In [36]:
def get_model_data(audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_c.wav"
        model_data = model_data.append({
                                    'audio_key' : audio_key,
                                    'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [37]:
data = get_model_data(librosa_audio_features)
data

Unnamed: 0,audio_feature,sarcasm,sarcasm_type,speaker,audio_key
0,"[[-618.9833984375, -502.91964935302735, -525.1...",0.0,NONE,SHELDON,1_10004_c.wav
1,"[[-593.0794067382812, -393.8955678632182, -359...",0.0,NONE,PENNY,1_10009_c.wav
2,"[[-489.5411376953125, -384.8366248759818, -209...",0.0,NONE,RAJ,1_1001_c.wav
3,"[[-311.09417724609375, -219.8680011995377, -32...",1.0,PRO,HOWARD,1_1003_c.wav
4,"[[-295.9714050292969, -413.38807861328127, -39...",0.0,NONE,SHELDON,1_10190_c.wav
...,...,...,...,...,...
1197,"[[-675.8568115234375, -532.7791871886322, -525...",0.0,NONE,OTHER,3_S06E02_398_c.wav
1198,"[[-574.171142578125, -402.7659506661551, -394....",1.0,PRO,RICHARD,3_S06E03_366_c.wav
1199,"[[-514.8732299804688, -368.3621577947912, -435...",1.0,PRO,OTHER,3_S06E05_355_c.wav
1200,"[[-578.3153076171875, -459.73959455818965, -43...",1.0,ILL,GILFOYLE,3_S06E06_143_c.wav


In [38]:
model = hub.load('https://tfhub.dev/google/trillsson3/1')

In [39]:
data['averaged_audio_feature'] = data.loc[:, 'audio_feature']
for index, row in data.iterrows():
    audio = row['averaged_audio_feature']
    data.at[index, "averaged_audio_feature"] = np.squeeze(np.array([np.mean(audio, axis=1)]))

In [40]:
values = data['averaged_audio_feature'].to_list()

In [41]:
tensors = tf.constant(values)
print(type(tensors))

<class 'tensorflow.python.framework.ops.EagerTensor'>


In [42]:
tensors = tf.cast(tensors, tf.dtypes.float32)

In [43]:
tensors.shape

TensorShape([1202, 690])

In [44]:
embeddings = model(tensors)

In [45]:
embeddings["embedding"].shape

TensorShape([1202, 1024])

In [46]:
embeddings["embedding"][0]

<tf.Tensor: shape=(1024,), dtype=float32, numpy=
array([-1.3758289 , -0.44799396, -1.1103612 , ...,  3.6759279 ,
        0.3203821 , -2.668886  ], dtype=float32)>

In [47]:
len(data)

1202

In [48]:
import pickle
trillfeat = {}
i = 0
for index, row in data.iterrows():
    trillfeat[row['audio_key']] = embeddings["embedding"][i].numpy()
    i = i + 1

with open('trill_context_features.pickle', 'wb') as handle:
    pickle.dump(trillfeat, handle, protocol=pickle.HIGHEST_PROTOCOL)