-
Notifications
You must be signed in to change notification settings - Fork 4
/
generate_mels.py
93 lines (80 loc) · 3.46 KB
/
generate_mels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
import time
import argparse
import math
import numpy as np
import torch
import os.path
import layers
from torch.utils.data import DataLoader
from data_utils import TextMelLoader, TextMelCollate
from hparams import create_hparams
from tqdm import tqdm
from scipy.io.wavfile import read
from utils import load_wav_to_torch, load_filepaths_and_text
from multiprocessing import Pool
def chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
# list(chunks([0,1,2,3,4,5,6,7,8,9],2)) -> [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
for i in range(0, len(lst), n):
yield lst[i:i + n]
def even_split(a, n):
"""Split a into n seperate chunks of roughly even length."""
n = min(n, len(a)) # if less elements in array than chunks to output, change chunks to array length
k, m = divmod(len(a), n)
return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n))
def multiprocess_arr(function, file_paths, threads=16):
p = Pool(threads)
split_file_paths = list(even_split(file_paths, threads))
result = p.map(function, split_file_paths)
for output in result:
if output: print(output)
def multiprocess_gen_mels(audiopaths_internal):
import layers
stft = layers.TacotronSTFT(
hparams.filter_length, hparams.hop_length, hparams.win_length,
hparams.n_mel_channels, hparams.sampling_rate, hparams.mel_fmin,
hparams.mel_fmax)
return_string = ""
total = len(audiopaths_internal)
for index, path in enumerate(audiopaths_internal):
if index < 0: continue
#try:
file = path.replace(".npy",".wav")
audio, sampling_rate, max_wav_value = load_wav_to_torch(file)
max_wav_value = max(max_wav_value, audio.max(), -audio.min()) # expect the impossible
if sampling_rate != stft.sampling_rate:
raise ValueError("{} {} SR doesn't match target {} SR".format(file,
sampling_rate, stft.sampling_rate))
audio_norm = audio / max_wav_value
audio_norm = audio_norm.unsqueeze(0)
audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False)
melspec = stft.mel_spectrogram(audio_norm)
melspec = torch.squeeze(melspec, 0).cpu().numpy()
np.save(file.replace('.wav', ''), melspec)
if not index % 1000:
print(total-index)
#except Exception as ex:
# return_string+=(path+" failed to process\nException: "+str(ex)+"\n")
if not return_string:
return_string = "No Errors on this process."
return return_string
def create_mels(training_filelist, validation_filelist, threads):
import glob
audiopaths = []
audiopaths.extend(
list(set([x[0] for x in load_filepaths_and_text(training_filelist) ]))
) # add all unique audio paths for training data
audiopaths.extend(
list(set([x[0] for x in load_filepaths_and_text(validation_filelist) ]))
) # add all unique audio paths for validation data
print(str(len(audiopaths))+" files being converted to mels")
multiprocess_arr(multiprocess_gen_mels, audiopaths, threads=threads)
if __name__ == '__main__':
hparams = create_hparams()
torch.backends.cudnn.enabled = hparams.cudnn_enabled
torch.backends.cudnn.benchmark = hparams.cudnn_benchmark
threads = 1 # Uses about 46GB of RAM each
print("Generating Mels")
create_mels(hparams.training_files, hparams.validation_files, threads)
print("Finished Generating Mels")