-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_dataset.py
77 lines (50 loc) · 2.07 KB
/
prepare_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import shutil
import csv
from pathlib import Path
from commons import CONFIGS_DIR
from configuration import load_config
CONFIG_FILENAME = 'prepare_config.json'
CSV_FILENAME = 'speakers.csv'
class SpeakerData:
def __init__(self, id: str, sex: str, dialect: str, files: list):
self.id = id
self.sex = sex
self.dialect = dialect
self.files = files
def get_directories(parent_dir: Path) -> list:
return [child_entry for child_entry in parent_dir.iterdir() if child_entry.is_dir()]
def get_speakers_data(dataset_dir: str) -> list:
dataset_path = Path.cwd().joinpath(dataset_dir)
speakers = []
for mode_dir in get_directories(dataset_path):
for dialect_dir in get_directories(mode_dir):
for speaker_dir in get_directories(dialect_dir):
speaker_sex = speaker_dir.name[0]
speaker_id = speaker_dir.name[1:]
speaker_files = [filename for filename in speaker_dir.iterdir() if filename.suffix == '.WAV']
speaker = SpeakerData(speaker_id, speaker_sex, dialect_dir.name, speaker_files)
speakers.append(speaker)
return speakers
def write_csv_file(csv_filename: str, speakers: list):
rows = [["ID", "Sex", "Dialect", "File"]]
for speaker in speakers:
for speaker_file in speaker.files:
rows.append([speaker.id, speaker.sex, speaker.dialect, speaker_file])
csv_path = Path.cwd().joinpath(csv_filename)
with open(csv_path, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows(rows)
def main():
config_path = Path.cwd().joinpath(CONFIGS_DIR).joinpath(CONFIG_FILENAME)
config = load_config(config_path)
dataset_dir = config['dataset_dir']
print(f'Getting speakers data from {dataset_dir}')
speakers = get_speakers_data(dataset_dir)
csv_dir = config['csv_path']
csv_path = Path(csv_dir).joinpath(CSV_FILENAME)
print(f'Writing speakers data to {csv_path}')
write_csv_file(csv_path, speakers)
print('Done')
if __name__ == '__main__':
main()