<a href="https://colab.research.google.com/github/EnPaiva93/tume/blob/colab/tume.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hackathon Latin America 2024

## Instalación

In [86]:
%%capture
!pip install groq streamlit ffmpeg-python

In [25]:
%%capture
!npm install localtunnel

## Utils

In [125]:
# def bytesio_to_ogg(input_bytesio: io.BytesIO, output_file: str = "temp.ogg"):
#     """
#     Convert audio data from a BytesIO object to an OGG file using ffmpeg-python.

#     Args:
#         input_bytesio (BytesIO): The input audio data as a BytesIO object.
#         output_file (str): The path to save the converted OGG file.
#     """
#     # Reset the BytesIO pointer to the start
#     input_bytesio.seek(0)

#     # Use ffmpeg to read from BytesIO and write to an OGG file
#     process = (
#         ffmpeg
#         .input('pipe:0', format='wav')  # Assuming the BytesIO contains WAV data
#         .output(output_file, format='ogg', acodec='libopus', ac=1, audio_bitrate='12k', application='voip')
#         .overwrite_output()
#         .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
#     )

#     # Write BytesIO content to ffmpeg process
#     stdout, stderr = process.communicate(input=input_bytesio.read())

#     if process.returncode != 0:
#         raise RuntimeError(f"ffmpeg failed: {stderr.decode('utf-8')}")

#     print(f"Converted to OGG: {output_file}")

def bytesio_to_ogg(input_bytesio: io.BytesIO, output_file: str = "temp.ogg"):
    """
    Convert audio data from a BytesIO object to an OGG file using ffmpeg-python.

    Args:
        input_bytesio (BytesIO): The input audio data as a BytesIO object.
        output_file (str): The path to save the converted OGG file.
    """
    # Reset the BytesIO pointer to the start
    input_bytesio.seek(0)

    # Use ffmpeg to read from BytesIO and write to an OGG file
    process = (
        ffmpeg
        .input('pipe:0', format='wav')  # Adjust format if input is not WAV
        .output(
            output_file,
            format='ogg',
            acodec='libopus',  # Correct codec for OGG
            ac=1,              # Mono audio
            b='12k',           # Correct option for bitrate
            application='voip' # Libopus-specific option
        )
        .overwrite_output()
        .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
    )

    # Send BytesIO content to FFmpeg and capture any errors
    stdout, stderr = process.communicate(input=input_bytesio.read())

    if process.returncode != 0:
        raise RuntimeError(f"ffmpeg failed: {stderr.decode('utf-8')}")

    print(f"Converted to OGG: {output_file}")


## Input

In [126]:
# Librería
import uuid
import os, io, subprocess, ffmpeg
from groq import Groq
from google.colab import userdata

from pydantic import BaseModel, ConfigDict, Field

In [20]:
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

In [148]:
class Audio(BaseModel):
    id: str = Field(default=str(uuid.uuid4()))
    audio_bytes: io.BytesIO
    audio_path: str = Field(default=None)
    format: str = Field(default=".ogg")

    def __init__(self, **data):
        super().__init__(**data)
        self.convert_to_ogg()
        self.audio_path=self.id+self.format

    class Config:
        arbitrary_types_allowed = True

    def convert_to_ogg(self):
        """
        Generar el audio temporal en formato ogg
        """
        output_file=self.id+self.format
        self.audio_bytes.seek(0)
        bytesio_to_ogg(self.audio_bytes, output_file)

class STT(BaseModel):
  client: Groq = None

  class Config:
        arbitrary_types_allowed = True

  def __init__(self, **data):
        super().__init__(**data)
        self.client = Groq(api_key=GROQ_API_KEY)

  def obtener_texto(self, audio: Audio):
        """
        Transcribes audio and returns the text.
        """

        # Check if the file exists
        if not os.path.exists(audio.audio_path):
            return "Error: The audio file could not be found. Please try again."

        try:
            # Open the OGG file and send it for transcription
            with open(audio.audio_path, "rb") as audio_file:
                transcription = self.client.audio.transcriptions.create(
                    file=audio_file,
                    model="whisper-large-v3-turbo",
                    language="es",
                    temperature=0.0
                )
                return transcription.text
        except Exception:
            # Return a generic error message for Streamlit
            return "An error occurred while processing the audio. Please try again."

In [147]:
stt = STT(api_key=GROQ_API_KEY)

# test

audio_data = io.BytesIO()
with open("temp.wav", "rb") as f:
    audio_data.write(f.read())
audio_data.seek(0)

audio1 = Audio(audio_bytes=audio_data)

result = stt.obtener_texto(audio1)

print(result)

Converted to OGG: d9549224-b91b-4477-a3d1-db3b7380a1e3.ogg
 Hola, ¿qué tal? Hola, ¿qué tal?


## Process

## Output

In [None]:
!pip install gTTS

In [8]:
text = """La noción más moderna en Occidente de enciclopedia de propósito general y de amplia distribución data de la época de Ephraim Chambers y su Cyclopaedia (1728). Luego le sucederían Denis Diderot y los enciclopedistas del siglo XVIII que se inspiraron en la Cyclopaedia de Chambers, creando el mayor hito del periodo de la Ilustración del siglo XVIII: la Encyclopédie (1751-1772), compilada por un grupo de escritores y hombres de ciencia franceses. De manera plenamente consciente, estos hombres estaban dando la espalda tanto a la religión como a la metafísica como fuentes de conocimiento, viendo en la ciencia pura y su aplicación, la tecnología, una nueva fuerza motriz intelectual según la ideología materialista y laicista propia de la clase media o burguesía entonces en auge. Reunieron en una vasta obra todos los conocimientos científicos de la época, no como un mero registro alfabético, sino como un relato del nuevo modo científico de interpretar el mundo. """

In [18]:
from gtts import gTTS
from IPython.display import Audio
tts = gTTS(text, lang='es', tld='com.mx')
# tts.save('hello.mp3')
tts.save('1.wav') #save the string converted to speech as a .wav file
sound_file = '1.wav'
Audio(sound_file, autoplay=True)

## UI

In [51]:
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

In [60]:
os.getenv('GROQ_API_KEY')

'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'

In [112]:
%%writefile app.py
import os
import streamlit as st
from groq import Groq
from google.colab import userdata

import ffmpeg
from io import BytesIO

# Set page config
st.set_page_config(page_title='Groq Translator', page_icon='🎤')

# Set page title
st.title('Groq Translator')

GROQ_API_KEY = os.getenv('GROQ_API_KEY')

audio_value = st.audio_input("Record a voice message")

client = Groq(api_key=GROQ_API_KEY)

def bytesio_to_ogg(input_bytesio: BytesIO, output_file: str = "temp.ogg"):
    """
    Convert audio data from a BytesIO object to an OGG file using ffmpeg-python.

    Args:
        input_bytesio (BytesIO): The input audio data as a BytesIO object.
        output_file (str): The path to save the converted OGG file.
    """
    # Reset the BytesIO pointer to the start
    input_bytesio.seek(0)

    # Use ffmpeg to read from BytesIO and write to an OGG file
    process = (
        ffmpeg
        .input('pipe:0', format='wav')  # Adjust format if input is not WAV
        .output(
            output_file,
            format='ogg',
            acodec='libopus',  # Correct codec for OGG
            ac=1,              # Mono audio
            b='12k',           # Correct option for bitrate
            application='voip' # Libopus-specific option
        )
        .overwrite_output()
        .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
    )

    # Send BytesIO content to FFmpeg and capture any errors
    stdout, stderr = process.communicate(input=input_bytesio.read())

    if process.returncode != 0:
        raise RuntimeError(f"ffmpeg failed: {stderr.decode('utf-8')}")

    print(f"Converted to OGG: {output_file}")


def obtener_texto(input):
    bytesio_to_ogg(input)
    audio_file= open("temp.ogg", "rb")
    return client.audio.transcriptions.create(
      file=audio_file,
      model="whisper-large-v3-turbo",
      language="es",
      temperature=0.0
    ).text

if audio_value:

  transcription = obtener_texto(audio_value)

  st.write(transcription)

Overwriting app.py


In [113]:
!streamlit run app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl https://ipv4.icanhazip.com/

34.81.133.21
your url is: https://fast-loops-sell.loca.lt
