<div align="center">
    
<br>

## FINE-GRAINED EMOTIONAL CONTROL OF TEXT-TO-SPEECH

#### LEARNING TO RANK INTER- AND INTRA-CLASS EMOTION INTENSITIES

Shijun Wang, Jón Guðnason, Damian Borth

**ICASSP 2023**

<br>

---

<br>

</div>

**1. Preprocessing**



In [11]:
import os
from text import _clean_text

DATA_PATH = '/workspace/data/EmoV-DB'
TARGET_DATA_PATH = '/workspace/montreal_forced_aligner/corpus'
NOISE_SYMBOL = ' [noise] '

audio_id_to_transcript = {}

with open(os.path.join(DATA_PATH, 'cmuarctic.data')) as f:
    for line in f.readlines():
        audio_id, transcript = line[2:-2].split('\"')[:2]

        audio_id = audio_id.strip()
        transcript = transcript.strip()

        if audio_id.startswith('arctic_b'):
            continue
        
        audio_id = audio_id[-4:]
        transcript = NOISE_SYMBOL + _clean_text(transcript, ['english_cleaners']) + NOISE_SYMBOL

        audio_id_to_transcript[audio_id] = transcript.strip()

In [12]:
import glob
import tqdm
import scipy
import librosa

speakers = ['bea', 'jenie', 'josh', 'sam']
emotions = ['neutral', 'amused', 'angry', 'disgusted', 'sleepy']

for speaker in tqdm.tqdm(speakers):
    for emotion in emotions:

        # check the path existence: josh has only three emotions
        spk_emo_path = os.path.join(DATA_PATH, speaker, emotion)
        if not os.path.exists(spk_emo_path):
            continue
        
        # resample and create .lab file
        for wav_path in glob.glob(os.path.join(spk_emo_path, '*.wav')):

            y, sr = librosa.load(wav_path, sr=16000)

            audio_id = wav_path[-8:-4]
            transcript = audio_id_to_transcript[audio_id]

            os.makedirs(os.path.join(TARGET_DATA_PATH, speaker), exist_ok=True)

            tgt_path = os.path.join(TARGET_DATA_PATH, speaker, f'{emotion}_{audio_id}')
            scipy.io.wavfile.write(tgt_path + '.wav', sr, y)

            with open(tgt_path + '.lab', 'w') as f:
                f.write(transcript + '\n')


100%|██████████| 4/4 [00:27<00:00,  6.77s/it]
