-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
226 lines (161 loc) · 7.87 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import re
import logging
from uuid import uuid4
from typing import Any
import gradio as gr
from transformers import pipeline
import numpy as np
from gtts import gTTS
from utils.constants import FrenchStyle, UnitsFrenchForm
from french_converter import FrenchConverter
from parsers import *
logger = logging.getLogger(__name__)
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
def convert_to_french(nums_str: str, french_style: str, output_type: str) -> Any:
"""
Function that initiates the conversion of the given numerical number to its french form
Parameters
----------
nums_str: str
A comma-separated string of the numbers passed through Gradio input field.
french_style: str
The type of french to use for conversion. See Enum `FrenchStyle` in `constants.py` file for more details.
output_type: str
The type of output desired by user after conversion. See Enum `OutputType` in `constants.py` file for more details.
Returns
-------
A dictonary/list conversion of each `valid` value in the input. Based on the selected `output_type`.
If any of the value is not a number, `None` is produced as its conversion.
Limitations:
1. Currently supports only the numerical part conversion.
2. Decimal part and Fractional numbers conversion will be added in the future.
"""
print(f"Given Input, french-style and output-type: {nums_str}, {french_style}, {output_type}")
logger.info("Extracting numbers from string, replacing rudimentary symbols and splitting")
original_values = nums_str.replace("[", "").replace("]", "").replace('"', "").replace("'", "").split(",")
nums_list = [int(_) if _.strip().isnumeric() else None for _ in original_values]
french_converted_list = []
french_converted_json = {}
logger.info("Loop through the extracted numbers and convert them to french form!")
for idx, num in enumerate(nums_list):
if num is None or len(str(num)) > 12:
french_converted_list.append(None)
french_converted_json[original_values[idx]] = None
continue
if num == 0:
converted_val = UnitsFrenchForm.ZERO.value
else:
fc_obj = FrenchConverter(num, french_style)
converted_val = fc_obj.convert()
french_converted_list.append(converted_val)
french_converted_json[original_values[idx]] = converted_val
print(f"Generated Output: {french_converted_json}\n")
if output_type == OutputType.JSON.value:
return french_converted_json
return french_converted_list
def recognize_speech_and_tts(audio: gr.Audio, french_style: str) -> str:
"""
Function that takes the input audio and then transcribes, extracts numbers, converts to french form for the numbers and generates the French Audio accordingly.
Parameters
----------
audio: gr.Audio
Speech spoken through a Microphone by the user.
french_style: str
The type of french to use for conversion. See Enum `FrenchStyle` in `constants.py` file for more details.
Returns
-------
A string specifying the location of the generated audio file.
"""
logger.info("Extracting sampling rate and raw speech from the spoken audio!")
sampling_rate, raw = audio
raw = raw.astype(np.float32)
raw /= np.max(np.abs(raw))
file_random_uuid = uuid4().__str__().split("-")[-1]
audio_file_name = f"generated_audio/audio-{file_random_uuid}.wav"
speech_output = transcriber(
{
"sampling_rate": sampling_rate,
"raw": raw
}
)
speech_output_txt = speech_output["text"]
logger.info("Converted Audio and now Extracting Numbers")
recognized_nums = re.findall(r'\b\d+\b', speech_output_txt)
if len(recognized_nums) == 0:
print("No Numbers found, returning back with sorry message!")
speech_prompt = SpeechOutputs.SORRY_MSG.value
generated_speech = gTTS(text=speech_prompt,
lang=SpeechLangs.EN.value,
slow=True,
tld=SpeechLangs.FR.value)
generated_speech.save(audio_file_name)
return audio_file_name
print("Calling `convert_to_french` from Speech function!")
converted_nums = convert_to_french(nums_str=", ".join(recognized_nums),
french_style=french_style,
output_type=OutputType.LIST.value)
converted_nums = [_.replace("-", " ") for _ in converted_nums]
converted_nums_str = SpeechOutputs.AND_CONNECTOR.value.join(converted_nums)
speech_prompt = SpeechOutputs.SUCCESS_MSG.value + converted_nums_str
print(f"Generating the speech of the constructed prompt: {speech_prompt}\n\n")
generated_speech = gTTS(text=speech_prompt,
lang=SpeechLangs.FR.value,
slow=True,
tld=SpeechLangs.FR.value
)
generated_speech.save(audio_file_name)
return audio_file_name
def main():
# Initializes the Gradio App
with gr.Blocks(title="Kata: Number to French Converter") as gradio_app:
gr.Markdown(
"""
# Kata: Number to French Converter
"""
)
with gr.Tab("Using Text"):
input_numbers_txt = gr.Text(
label="Input List of Numbers (Comma Separated, as shown in placeholder)",
placeholder="[0, 10]"
)
french_style_dropdown_txt = gr.Dropdown(
choices=[_.value for _ in FrenchStyle],
label="Select a French Style",
value=FrenchStyle.FRANCE_FRENCH.value
)
output_type_dropdown_txt = gr.Dropdown(
choices=[_.value for _ in OutputType],
label="Desired Output Type",
value=OutputType.JSON.value
)
text_buttton = gr.Button("Convert to French Form",
variant="primary",
scale=0)
output_json = gr.Json(label="French Form of the input number list")
text_buttton.click(convert_to_french,
inputs=[input_numbers_txt,
french_style_dropdown_txt,
output_type_dropdown_txt],
outputs=output_json)
with gr.Tab("Using Speech"):
audio_input = gr.Audio(sources=["microphone"],
label="Click on `Record` to speak through your Microphone and press `Stop` when done",
show_download_button=True)
french_style_dropdown_speech = gr.Dropdown(
choices=[_.value for _ in FrenchStyle],
label="Select a French Style",
value=FrenchStyle.FRANCE_FRENCH.value
)
speech_button = gr.Button("Recognize Numbers and Generate French Form Audio",
variant="primary",
scale=0)
audio_output = gr.Audio(autoplay=True,
label="Generated Speech",
show_download_button=True)
speech_button.click(recognize_speech_and_tts,
inputs=[audio_input, french_style_dropdown_speech],
outputs=audio_output)
logger.info("Launching Gradio App Interface for accepting inputs!")
gradio_app.launch()
if __name__ == '__main__':
main()