In [43]:
import os
from pathlib import Path
import re

import pandas as pd
from shutil import move, rmtree

In [3]:
review_path = Path("data/esd_done/")

df = None
for file in review_path.rglob("*.xlsx"):
    if df is None:
        df = pd.read_excel(file, engine="openpyxl")
    else:
        df = pd.concat([df, pd.read_excel(file, engine="openpyxl")], ignore_index=True)

In [4]:
df['speaker'] = df['filename'].str.split("_").apply(lambda x: x[0][1:])

In [5]:
df.head()

Unnamed: 0,filename,text,modified text,accept,reject,comment,doubt,speaker
0,F0016_angry_0016_000351,"The nine the eggs , I keep .",,accepted,,,,16
1,F0016_angry_0016_000352,"I did go , and made many prisoners .",,accepted,,,,16
2,F0016_angry_0016_000353,That I owe my thanks to you .,,accepted,,,,16
3,F0016_angry_0016_000354,They went up to the dark mass job had pointed ...,,accepted,,,,16
4,F0016_angry_0016_000355,Clear than clear water !,,accepted,,,,16


In [6]:
df[~df["reject"].isnull()].shape

(10, 8)

In [7]:
df[df["accept"] == "modified"]

Unnamed: 0,filename,text,modified text,accept,reject,comment,doubt,speaker
34,F0016_angry_0016_000385,The name really is the agedaged man .,The name really is the adaged man .,modified,,,,16
85,F0016_angry_0016_000436,And tick a tack too .,And a tick a tack too .,modified,,,,16
381,F0016_surprise_0016_001435,The name really is the agedaged man .,The name really is the adaged man .,modified,,,,16
427,F0016_surprise_0016_001486,And tick a tack too .,And a tick a tack too .,modified,,,,16
1438,F0015_sad_0015_001313,She said in subdued voice .,She said in a subdued voice .,modified,,,,15
1456,F0015_sad_0015_001348,She come back to the valley .,She came back to the valley .,modified,,,,15
1521,F0016_neutral_0016_000035,The name really is the agedaged man .,The name really is the adaged man .,modified,,,,16
1582,F0016_neutral_0016_000096,I don’t painted tiger .,I don’t painted a tiger .,modified,,,,16
1634,F0016_neutral_0016_000148,"And on the top of them , came Winnie , the cut...","And on top of them , came Winnie , the cute be...",modified,,,,16
1980,F0017_neutral_0017_000144,We all see panda on tv or in the zoo .,We all see a panda on tv or in the zoo .,modified,,,,17


In [9]:
folders = ["train", "evaluation", "test"]

datapath = Path("data/esd")

for speaker in os.listdir(datapath):
    wav_dir = datapath / speaker / "wav"
    wav_dir.mkdir(exist_ok=True, parents=True)
    
    for emo in ["Angry", "Happy", "Neutral", "Sad", "Surprise"]:
        if not (datapath / speaker / emo ).is_dir():
            continue
        for folder in folders:
            for file in os.listdir(datapath / speaker / emo / folder):
                os.replace(datapath / speaker / emo / folder / file, wav_dir / f"{emo}_{file}")
            if (datapath / speaker / emo / folder).is_dir():
                os.removedirs(datapath / speaker / emo / folder)
        if (datapath / speaker / emo ).is_dir():
            os.removedirs(datapath / speaker / emo)

In [10]:
for speaker in os.listdir(datapath):
    wav_dir = datapath / speaker / "wav"
    for file in os.listdir(wav_dir):
        os.replace(wav_dir / file, wav_dir / file.split("_", 1)[1])


In [11]:
encodings = {
    "0011": "ascii",
    "0012": "utf16",
    "0013": "utf16",
    "0014": "utf16",
    "0015": "ascii",
    "0016": "iso8859",
    "0017": "iso8859",
    "0018": "utf16",
    "0020": "ascii",
}

In [30]:
for speaker, encoding in encodings.items():
    text_folder = Path(f"data/esd/{speaker}/text")
    text_folder.mkdir(parents=True, exist_ok=True)
    with open(f"data/esd/{speaker}/{speaker}.txt", encoding=encoding) as f:
        for row in f:
            if re.search("\w", row):
                name, text = row.split(maxsplit=1)
                with open(text_folder / f"{name}.txt", "w") as fw:
                    fw.write(text + "\n")
    os.remove(f"data/esd/{speaker}/{speaker}.txt")

In [36]:
wavs = list(Path("data/esd").rglob("*.wav"))

In [37]:
texts = list(Path("data/esd").rglob("*.txt"))

In [41]:
audio_dir = Path("data/esd/audio")
text_dir = Path("data/esd/text")
audio_dir.mkdir(exist_ok=True, parents=True)
text_dir.mkdir(exist_ok=True, parents=True)
for file in wavs:
    speaker = file.parent.parent.name
    speaker_audio_dir = audio_dir / speaker
    speaker_audio_dir.mkdir(exist_ok=True, parents=True)
    move(file, speaker_audio_dir / file.name)
    

In [42]:
for file in texts:
    speaker = file.parent.parent.name
    speaker_text_dir = text_dir / speaker
    speaker_text_dir.mkdir(exist_ok=True, parents=True)
    move(file, speaker_text_dir / file.name)

In [44]:
for speaker in encodings.keys():
    rmtree(f"data/esd/{speaker}")

In [47]:
OUTPUT_DIR = "data/esd"

In [49]:
!python src/preprocessing/resampling.py --input-dir $OUTPUT_DIR/audio --output-dir $OUTPUT_DIR/resampled --resample-rate 22050 --audio-ext wav

Number of audio files found: 15750
Resampling audio from 16000 Hz to 22050 Hz...
100%|████████████████████████████████████| 15750/15750 [01:31<00:00, 171.52it/s]
Finished successfully.
Processed files are located at data/esd/resampled


In [50]:
!python src/preprocessing/wav_to_mel.py --input-dir $OUTPUT_DIR/resampled --output-dir $OUTPUT_DIR/mels --audio-ext wav

Number of audio files found: 15750
Transforming audio to mel...
  mel = librosa_mel(sample_rate, n_fft, num_mels, fmin, fmax)
  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]
100%|████████████████████████████████████| 15750/15750 [02:14<00:00, 117.22it/s]
Finished successfully.
Processed files are located at data/esd/mels
[0m

In [51]:
!python src/preprocessing/text_normalization.py --input-dir $OUTPUT_DIR/text --output-dir $OUTPUT_DIR/mfa_inputs


Number of text files found: 15749
Normalizing texts...
100%|███████████████████████████████████| 15749/15749 [00:07<00:00, 2238.27it/s]
Finished successfully.
Processed files are located at data/esd/mfa_inputs


In [52]:
!mkdir -p models && wget -q --show-progress https://github.com/MontrealCorpusTools/mfa-models/raw/main/acoustic/english.zip -P models



In [53]:
!wget -q --show-progress http://www.openslr.org/resources/11/librispeech-lexicon.txt -P models



In [55]:
!python src/preprocessing/mfa_preprocessing.py --input-dir $OUTPUT_DIR/resampled --output-dir $OUTPUT_DIR/mfa_inputs

9it [00:29,  3.26s/it]
15750 files were copied to data/esd/mfa_inputs


In [63]:
!mfa align -t ./temp --clean -j 4 data/esd/mfa_inputs models/librispeech-lexicon.txt models/english.zip data/esd/mfa_outputs

Cleaning old directory!
[32mINFO[0m - Setting up corpus information...
[32mINFO[0m - Number of speakers in corpus: 9, average number of utterances per speaker: 1750.0
[32mINFO[0m - Parsing dictionary "librispeech-lexicon" without pronunciation probabilities without silence
                probabilities
[32mINFO[0m - Creating dictionary information...
[32mINFO[0m - Setting up training data...
[32mINFO[0m - Generating base features (mfcc)...
[32mINFO[0m - Calculating CMVN...
[32mINFO[0m - Setting up training data...
[32mINFO[0m - Setting up training data...
[32mINFO[0m - Done with setup!
[32mINFO[0m - Performing first-pass alignment...
[32mINFO[0m - Calculating fMLLR for speaker adaptation...
[32mINFO[0m - Performing second-pass alignment...
[32mINFO[0m - Generating CTMs from alignment...
[32mINFO[0m - Finished generating CTMs!
[32mINFO[0m - Exporting TextGrids from CTMs...
[32mINFO[0m - Finished exporting TextGrids!
[32mINFO[0m - All done!


In [64]:
!rm -rf temp

In [65]:
!python src/preprocessing/mfa_postprocessing.py --input-dir data/esd/mels

15750it [00:00, 16460.46it/s]
Finished successfully.


In [None]:
rejected = df[df["accept"].isnull()]

In [None]:
for filename, speaker in zip(rejected["filename"], rejected["speaker"]):
    if os.path.isfile(f"data/esd/{speaker}/{filename}.wav"):
        os.remove(f"data/esd/{speaker}/{filename}.wav")
    if os.path.isfile(f"data/esd/{speaker}/{filename}.txt"):
        os.remove(f"data/esd/{speaker}/{filename}.txt")

In [12]:
import tgt

In [13]:
for speaker in os.listdir(datapath / "resampled"):
    for file in os.listdir(f"data/esd_vctk/mfa_outputs/{speaker}/"):
        if os.path.isdir(f"data/esd_vctk/mfa_outputs/{speaker}/{file}"):
            continue
        text_grid = tgt.read_textgrid(f"data/esd_vctk/mfa_outputs/{speaker}/{file}")
        words = text_grid.get_tier_by_name("words")
        text = " ".join([x.text for x in words.intervals])
        with open(datapath / speaker / f"{file.replace('TextGrid', 'txt')}", "w") as f:
            f.write(text + "\n")

FileNotFoundError: [Errno 2] No such file or directory: 'data/esd/resampled'

In [251]:
modified = df[df["accept"] == "modified"]

In [252]:
modified.head()

Unnamed: 0,filename,text,modified text,accept,reject,comment,doubt,speaker
34,0016_000385,The name really is the agedaged man .,The name really is the adaged man .,modified,,,,16
85,0016_000436,And tick a tack too .,And a tick a tack too .,modified,,,,16
381,0016_001435,The name really is the agedaged man .,The name really is the adaged man .,modified,,,,16
427,0016_001486,And tick a tack too .,And a tick a tack too .,modified,,,,16
1423,0015_001024,she caught at the brooch .,And she caught at the brooch .,modified,,,,15


In [254]:
for filename, speaker, orig, modif in zip(modified["filename"], modified["speaker"], modified["text"], modified["modified text"]):
    path = datapath / speaker / f"{filename}.txt"
    with open(path, "w") as f:
        f.write(modif + "\n")

In [5]:
df["speaker"].unique()

array(['0016', '0018', '0015', '0017'], dtype=object)

In [5]:
copy_speakers = list(df["speaker"].unique()) + [s for s in os.listdir("data/esd_vctk/resampled/") if not s.startswith("00")]

In [6]:
exclude_speakers = [s for s in os.listdir("data/esd_vctk/resampled/") if s not in copy_speakers]

In [7]:
exclude_speakers

['0013', '0012', '0011', '0014', '0020']

In [8]:
percentages = [7, 3, 2, 1, 0.5]

In [9]:
from shutil import copy

In [10]:
import json

In [11]:
with open("speaker_emo.json") as f:
    speaker_emo_dict = json.load(f)

In [12]:
def get_files(p, speaker):
    emo_dict = speaker_emo_dict[speaker]
    files = []
    for val in emo_dict.values():
        to_get = int(p * len(val))
        files.extend(val[:to_get])
    return files

In [14]:
ext_dict = {
    "mels": "pkl",
    "mfa_outputs": "TextGrid",
    "resampled": "wav"
}

In [16]:
root_path = Path("data/esd_vctk/")

for p in percentages:
    new_path = Path(f"data/esd_vctk_{p}/")
    for folder in os.listdir(root_path):
        for speaker in copy_speakers:
            speaker_path = new_path / folder / speaker
            speaker_path.mkdir(exist_ok=True, parents=True)
            if (root_path / folder / speaker).is_dir():
                for file in os.listdir(root_path / folder / speaker):
                    copy(root_path / folder / speaker / file, speaker_path / file)
        for speaker in exclude_speakers:
            speaker_path = new_path / folder / speaker
            speaker_path.mkdir(exist_ok=True, parents=True)
            files_copy = get_files(p / 15, speaker)
            for file in files_copy:
                file = f"{file}.{ext_dict[folder]}"
                if (root_path / folder / speaker / file).is_file():
                    copy(root_path / folder / speaker / file, speaker_path / file)