/
flow.py
121 lines (106 loc) · 4.27 KB
/
flow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
@author: Amlaan Bhoi, Debojit Kaushik
@date: October 19 2018
Module to encapsulate speech to text, text to code, and text to speech requests from Google Speech-to-Text API, Google Text-to-Speech API, and Microsoft Luis API.
"""
import os
import json
import requests
import speech_recognition as sr
from google.cloud import speech
from google.cloud import texttospeech
from google.cloud.speech import enums
from google.cloud.speech import types
from CONST import CREDENTIAL_FILE_NAME, LUIS_ENDPOINT, HEADERS, PARAMS
PATH = os.getcwd() + CREDENTIAL_FILE_NAME
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "%s" % PATH
class Speech2TextRequest:
@staticmethod
def _create_to_text_request():
"""
Function to record audio from microphone
Args:
None
Returns:
text: [string] speech to text response string
"""
r = sr.Recognizer()
mic = sr.Microphone(device_index=0)
print("Say something...")
with mic as source:
r.adjust_for_ambient_noise(source, duration=1.0)
audio = r.listen(source)
print("Audio recorded!")
return Speech2TextRequest._transcribe_file(audio)
@staticmethod
def _transcribe_file(audio):
"""
Function to call Google Speech-to-Text API to convert speech to text
Args:
audio: [PyAudio] audio file to transcribe
Returns:
transcript: [string] result from speech to text
"""
client = speech.SpeechClient()
audio = audio.get_wav_data()
audio = types.RecognitionAudio(content=audio)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
language_code='en-US')
response = client.recognize(config, audio)
for result in response.results:
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
return result.alternatives[0].transcript
class Text2CodeRequest:
@staticmethod
def _create_to_code_request(query, headers, params):
"""
Function to create a text to code request to Microsoft Luis
Args:
query: [string] text containing query
headers: [dict] necessary headers for API request
parameters: [dict] parameters for customizing API request
Returns:
json_response: [dict] a dictionary holding response from Luis including intent classifications
"""
try:
params["q"] = query
r = requests.get(LUIS_ENDPOINT, headers=headers, params=params)
json_response = r.json()
print(r.status_code)
print(json_response)
return json_response
except Exception as e:
print("[Errno {0}] {1}".format(e.errno, e.strerror))
class Text2SpeechRequest:
@staticmethod
def _create_to_speech_request(query, filename):
"""
Function to create a text to speech request to Google Text-Speech-API
Args:
query: [string] text containing query to convert to speech
filename: [string] filename to store the audio file
Returns:
None
"""
client = texttospeech.TextToSpeechClient()
synthesis_input = texttospeech.types.SynthesisInput(text=query)
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE
)
audio_config = texttospeech.types.AudioConfig(audio_encoding=texttospeech.enums.AudioEncoding.MP3)
response = client.synthesize_speech(synthesis_input, voice, audio_config)
path = "/Users/amlaanbhoi/Desktop/s2c_mp3/"
with open(path + file_name + '.mp3', 'wb') as out:
out.write(response.audio_content)
# Tests
if __name__ == "__main__":
# r = Speech2TextRequest._create_to_text_request()
# t = Text2CodeRequest._create_to_code_request(r, HEADERS, PARAMS)
# if r is None:
# r = "Something went wrong..."
r = "U!"
file_name = "undo_changes"
t = Text2SpeechRequest()._create_to_speech_request(r, file_name)