In [1]:
from google.cloud import speech_v1p1beta1 as speech
from google.oauth2 import service_account
import io
import re
import os
from unidecode import unidecode

In [2]:
# Substitua pelo caminho do seu arquivo de credenciais JSON
credenciais = "credentials.json"

# Carregar as credenciais
credentials = service_account.Credentials.from_service_account_file(credenciais)

# Instanciar o cliente de Speech-to-Text
client = speech.SpeechClient(credentials=credentials)

In [3]:
def transcribe(content):

    # Configuração do áudio
    audio = speech.RecognitionAudio(content=content)
    # config = speech.RecognitionConfig(
    #     encoding=speech.RecognitionConfig.AudioEncoding.MP3,  # Alterado para MP3
    #     sample_rate_hertz=48000,
    #     language_code="pt-BR",
    # )

    # Solicitar a transcrição
    response = client.recognize(config=config, audio=audio)
    # response = client.recognize(config=config, audio=audio)

    transcriptions = [result.alternatives[0].transcript for result in response.results]

    return " ".join(transcriptions)

In [4]:
def parse_response(transcription):
    text = unidecode(transcription)
    pattern = r'([a-hA-H]).*(\d).*([a-hA-H]).*(\d)'
    match = re.search(pattern, text)
    elements = match.groups()
    formatted_elements = [element.upper() if element.isalpha() else element for element in elements]

    response = {
        "transcription": transcription,
        "move": formatted_elements[0] + formatted_elements[1] + ' ' + formatted_elements[2] + formatted_elements[3],
        "org_letter": formatted_elements[0],
        "org_num": formatted_elements[1],
        "dst_letter": formatted_elements[2],
        "dst_num": formatted_elements[3]
    }

    return response
    

In [5]:
def get_move():
    # Caminho para o arquivo de áudio a ser testado
    audio_file_path = os.path.join(os.getcwd(), 'audio-plays', "audio.wav")

    # Carregar o arquivo de áudio
    with io.open(audio_file_path, "rb") as audio_file:
        content = audio_file.read()

    # Transcreve o audio
    txt = transcribe(content)
    # Parseia a jogada transcrita
    response = parse_response(txt)
    
    # Deleta o arquivo da jogada transcrita
    os.remove(audio_file_path)

    return response

In [6]:
from flask import Flask, request, jsonify
from google.cloud import speech_v1p1beta1 as speech
from google.oauth2 import service_account
import io

app = Flask(__name__)

# Substitua pelo caminho do seu arquivo de credenciais JSON
credenciais = "credentials.json"

# Carregar as credenciais
credentials = service_account.Credentials.from_service_account_file(credenciais)

# Instanciar o cliente de Speech-to-Text
client = speech.SpeechClient(credentials=credentials)

@app.route('/get_move', methods=['GET'])
def get_move_from_audio():
    
    response = get_move()

    return jsonify({"response": response})


@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400
    
    file = request.files['file']

    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    content = file.read()

    transcription = transcribe(content)

    response = parse_response(transcription)

    return jsonify({"response": response})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.16.129.2:5000
Press CTRL+C to quit
