-
Notifications
You must be signed in to change notification settings - Fork 1
/
voiceTranslationAnalysis.py
executable file
·99 lines (79 loc) · 2.87 KB
/
voiceTranslationAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""
import os
import pathlib
import co
import numpy as np
import tensorflow as tf
def main() -> co.Serial:
path = "/conducto/data/pipeline"
root = co.Serial(image=get_image())
# Get data from keras for testing and training
root["Get Data"] = co.Exec(run_whole_thing, f"{path}/raw")
return root
def run_whole_thing(out_dir):
os.makedirs(out_dir, exist_ok=True)
# Set seed for experiment reproducibility
seed = 55
tf.random.set_seed(seed)
np.random.seed(seed)
data_dir = pathlib.Path("data/mini_speech_commands")
if not data_dir.exists():
# Get the files from external source and put them in an accessible directory
tf.keras.utils.get_file(
'mini_speech_commands.zip',
origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
extract=True
)
# Convert the binary audio file to a tensor
def decode_audio(audio_binary):
audio, _ = tf.audio.decode_wav(audio_binary)
return tf.squeeze(audio, axis=-1)
def get_label(file_path):
parts = tf.strings.split(file_path, os.path.sep)
return parts[-2]
# Create a tuple that has the labeled audio files
def get_waveform_and_label(file_path):
label = get_label(file_path)
audio_binary = tf.io.read_file(file_path)
waveform = decode_audio(audio_binary)
return waveform, label
# Convert audio files to images
def get_spectrogram(waveform):
# Padding for files with less than 16000 samples
zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)
# Concatenate audio with padding so that all audio clips will be of the same length
waveform = tf.cast(waveform, tf.float32)
equal_length = tf.concat([waveform, zero_padding], 0)
spectrogram = tf.signal.stft(
equal_length, frame_length=255, frame_step=128
)
spectrogram = tf.abs(spectrogram)
return spectrogram
# Label the images created from the audio files and return a tuple
def get_spectrogram_and_label_id(audio, label):
spectrogram = get_spectrogram(audio)
spectrogram = tf.expand_dims(spectrogram, -1)
label_id = tf.argmax(label == commands)
return spectrogram, label_id
# Preprocess any audio files
def preprocess_dataset(files, autotune, commands):
# Creates the dataset
files_ds = tf.data.Dataset.from_tensor_slices(files)
# Matches audio files with correct labels
output_ds = files_ds.map(
get_waveform_and_label,
num_parallel_calls=autotune
)
# Matches audio file images to the correct labels
output_ds = output_ds.map(
get_spectrogram_and_label_id, num_parallel_calls=autotune)
return output_ds
def get_image():
return co.Image(
"python:3.8-slim",
copy_dir=".",
reqs_py=["conducto", "tensorflow", "keras"],
)
if __name__ == "__main__":
co.main(default=main)
"""