In [1]:
from pydub import AudioSegment
import numpy as np
import pandas as pd
import random



In [2]:
file_locs = pd.read_csv('local_audio_files.csv')
file_locs.head()

Unnamed: 0,Actor,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetion,Gender,Audio_file
0,Actor_01,3,1,1,1,1,1,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
1,Actor_01,3,1,1,1,1,2,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
2,Actor_01,3,1,1,1,2,1,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
3,Actor_01,3,1,1,1,2,2,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
4,Actor_01,3,1,2,1,1,1,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...


In [3]:
actors = set(file_locs['Actor'].unique())
print(actors)

{'Actor_20', 'Actor_10', 'Actor_18', 'Actor_13', 'Actor_07', 'Actor_15', 'Actor_17', 'Actor_01', 'Actor_12', 'Actor_19', 'Actor_16', 'Actor_22', 'Actor_24', 'Actor_06', 'Actor_09', 'Actor_03', 'Actor_14', 'Actor_02', 'Actor_08', 'Actor_21', 'Actor_04', 'Actor_11', 'Actor_23', 'Actor_05'}


In [4]:
def get_random_mixed_clip(data=file_locs, actors=actors):
    clip_num = random.randint(2, 4)  # 2 to 4 clips combined into a single clip
    speaker_num = random.randint(2, clip_num)  # at least 2 speakers chosen
    speaker_choices = random.sample(actors, speaker_num)
    
    # given n speakers and m clips, generating a random distribution of clips per speaker such that each speaker is at least once
    arr = [1 for _ in range(speaker_num)]
    for _ in range(clip_num-speaker_num):  # number of slots left to be filled
        arr[random.randint(0, speaker_num-1)] += 1
    
    idxs = data.Emotion[data.Actor.eq(speaker_choices[0])].sample(arr[0]).index
    for i in range(1, len(arr)):
        idxs = idxs.union(data.Emotion[data.Actor.eq(speaker_choices[i])].sample(arr[i]).index)
    clip_df = data.loc[idxs].reset_index(drop=True)
    return clip_df

In [5]:
get_random_mixed_clip()

Unnamed: 0,Actor,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetion,Gender,Audio_file
0,Actor_02,3,1,5,2,2,2,1,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
1,Actor_08,3,1,6,1,2,1,1,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
2,Actor_12,3,1,8,2,2,2,1,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...


In [7]:
def combine_clips(clip_df):
    clips = []
    for path in clip_df['Audio_file']:
        clips.append(AudioSegment.from_wav(path))
    final_clip = clips[0]
    for clip in clips[1:]:
        final_clip = final_clip + clip
    
    return final_clip

In [8]:
mixed_clip_df = get_random_mixed_clip()
mixed_clip = combine_clips(mixed_clip_df)
mixed_clip_df  # showing output in ipynb terminal

Unnamed: 0,Actor,Modality,Vocal_channel,Emotion,Emotional_intensity,Statement,Repetion,Gender,Audio_file
0,Actor_11,3,1,2,2,1,2,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
1,Actor_11,3,1,3,2,2,1,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...
2,Actor_13,3,1,7,2,2,2,0,/Users/iyeng/Desktop/NTU/NTU Sem 5/CZ4042/Grou...


In [9]:
def generate_clip_name(clip_df):
    emotion_dict = {1:"neutral", 2:"calm", 3:"happy", 4:"sad", 5:'angry', 6:'fearful', 7:'disgust', 8:'surprised'}
    name = 'mixed_clip'
    for i in range(len(clip_df)):
        name+= '_'+clip_df['Actor'][i][-2:]+'_'+emotion_dict[clip_df['Emotion'][i]]
    return name+'.wav'

In [10]:
generate_clip_name(get_random_mixed_clip())

'mixed_clip_10_sad_21_angry_24_angry_24_fearful.wav'

In [11]:
for _ in range(10):  # generating 10 clips
    mixed_clip_df = get_random_mixed_clip()
    mixed_clip = combine_clips(mixed_clip_df)
    clip_name = generate_clip_name(mixed_clip_df)
    mixed_clip.export("mixed_data/"+clip_name, format="wav")