In [2]:
#Import Dependencies

import audeer
import audonnx
import numpy as np
import pandas as pd
import audiofile

cache_root = audeer.mkdir('cache')
model_root = audeer.mkdir('model')

In [None]:
#Download model if necessary
url = 'https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip'

archive_path = audeer.download_url(url, cache_root, verbose=True)
audeer.extract_archive(archive_path, model_root)

In [3]:
#Load model and test with random sample
model = audonnx.load(model_root)
sampling_rate = 16000
signal = np.random.normal(size=sampling_rate).astype(np.float32)
model(signal, sampling_rate)

{'hidden_states': array([[-0.00715537,  0.00622466, -0.00822073, ...,  0.00659769,
          0.00971537,  0.00293756]], dtype=float32),
 'logits': array([[0.67691374, 0.6504316 , 0.4971407 ]], dtype=float32)}

In [4]:
#Function definitions to use the model to extract arousal valence dominance from an audiofile

def predict_avd(path):
    signal, sampling_rate = audiofile.read(
                path,
                always_2d=True,
            )
    ret = model(signal, sampling_rate)
    return ret["logits"][0]

def extract_avd(input_file,output_file):
    df = pd.read_csv(input_file)
    #df = df.sample(n=3) # test on small subset
    data = {"ActorID":[],"emotion":[],"path":[],"arousal":[],"valence":[],"dominance":[]}
    for index, row in df.iterrows():
        ret = predict_avd(row["path"])
        data["arousal"].append(ret[0])
        data["valence"].append(ret[1])
        data["dominance"].append(ret[2])
        data["path"].append(row['path'])
        data["emotion"].append(row['emotion'])
        data["ActorID"].append(row['ActorID'])
    resdf = pd.DataFrame.from_dict(data)
    resdf.to_csv(output_file,index=False)


In [None]:
extract_avd("data/crema_df.csv","data/crema_avd.csv")