In [None]:
# Create the Streamlit script file
app_code = """
import os
os.environ['GOOGLE_API_KEY'] = 'AIzaSyA9GfsX8G2ivedwGKKhthq9fD619p1ZO9o'
import streamlit as st
import webrtcvad
import numpy as np
from faster_whisper import WhisperModel
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks import get_openai_callback
from dotenv import load_dotenv
import nltk
from nltk.tokenize import sent_tokenize
import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import tempfile
import soundfile as sf
import pyaudio
import wave
import librosa

# Load environment variables
load_dotenv()

# Download the punkt tokenizer for sentence splitting
nltk.download('punkt', quiet=True)

# Voice Activity Detection and Speech Recognition
class VAD:
    def __init__(self, mode=3, sample_rate=16000, frame_duration_ms=30):
        self.vad = webrtcvad.Vad(mode)
        self.sample_rate = sample_rate
        self.frame_duration_ms = frame_duration_ms
        self.frame_size = int(sample_rate * frame_duration_ms / 1000)
        self.model = WhisperModel("small", device="cuda" if torch.cuda.is_available() else "cpu", compute_type="float32")

    def process_audio(self, audio_data):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            sf.write(temp_file.name, audio_data, self.sample_rate)
            segments, _ = self.model.transcribe(temp_file.name, beam_size=5)
        return " ".join([segment.text for segment in segments])

# Language Model
def setup_llm():
    llm = ChatGoogleGenerativeAI(model="gemini-pro",
                                 google_api_key=os.getenv("GOOGLE_API_KEY"),
                                 temperature=0.7)
    prompt = ChatPromptTemplate.from_template(
        "You are a helpful assistant. Provide a concise response to the following: {question}"
    )
    return LLMChain(llm=llm, prompt=prompt)

def generate_response(chain, question, max_sentences=2):
    try:
        with get_openai_callback() as cb:
            response = chain.run(question=question)
        sentences = sent_tokenize(response)
        return ' '.join(sentences[:max_sentences])
    except Exception as e:
        st.error(f"LLM Error: {e}")
        return "I apologize, but I encountered an error while processing your request."

# Text-to-Speech
class TTS:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")
        self.sampling_rate = 16000

    def text_to_speech(self, text, description):
        inputs = self.tokenizer(description, return_tensors="pt").to(self.device)
        prompt = self.tokenizer(text, return_tensors="pt").to(self.device)

        with torch.no_grad():
            audio = self.model.generate(
                input_ids=inputs.input_ids,
                prompt_input_ids=prompt.input_ids,
                attention_mask=inputs.attention_mask,
                prompt_attention_mask=prompt.attention_mask,
            )

        audio = audio.cpu().numpy().squeeze()
        # Increase the speed of the audio
        return librosa.effects.time_stretch(audio, rate=1.2)

def record_audio(duration=5, sample_rate=16000):
    try:
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paFloat32,
                        channels=1,
                        rate=sample_rate,
                        input=True,
                        frames_per_buffer=1024)

        st.info(f"Recording for {duration} seconds...")
        frames = []
        for _ in range(0, int(sample_rate / 1024 * duration)):
            data = stream.read(1024)
            frames.append(np.frombuffer(data, dtype=np.float32))

        st.info("Recording finished.")
        stream.stop_stream()
        stream.close()
        p.terminate()

        return np.concatenate(frames)
    except OSError as e:
        st.error(f"Error accessing audio device: {e}")
        st.warning("Microphone input is not available. Please use file upload instead.")
        return None

# Streamlit app
def main():
    st.title("Speech-to-Speech Pipeline")

    vad = VAD()
    llm_chain = setup_llm()
    tts = TTS()

    if 'audio_data' not in st.session_state:
        st.session_state.audio_data = None

    input_method = st.radio("Choose input method:", ("Microphone", "File Upload"))

    if input_method == "Microphone":
        if st.button("Record Audio"):
            audio_data = record_audio()
            if audio_data is not None:
                st.session_state.audio_data = audio_data
                st.success("Audio recorded successfully!")
            else:
                st.error("Failed to record audio. Please try file upload instead.")
    else:
        uploaded_file = st.file_uploader("Upload an audio file", type=["wav"])
        if uploaded_file is not None:
            st.session_state.audio_data, _ = sf.read(uploaded_file)
            st.success("File uploaded successfully!")

    if st.session_state.audio_data is not None:
        if st.button("Process"):
            # Speech to Text
            with st.spinner("Transcribing..."):
                transcription = vad.process_audio(st.session_state.audio_data)
            st.write("Transcription:", transcription)

            # LLM Response
            with st.spinner("Generating response..."):
                response = generate_response(llm_chain, transcription)
            st.write("Response:", response)

            # Text to Speech
            with st.spinner("Converting text to speech..."):
                description = "Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."
                audio = tts.text_to_speech(response, description)

            # Save and play audio
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tf:
                sf.write(tf.name, audio, tts.sampling_rate)
                st.audio(tf.name)

if __name__ == "__main__":
    main()
"""

with open('app.py', 'w') as f:
    f.write(app_code)


In [None]:
!pip install -r "/content/requirements.txt"

Collecting git+https://github.com/huggingface/parler-tts.git (from -r /content/requirements.txt (line 24))
  Cloning https://github.com/huggingface/parler-tts.git to /tmp/pip-req-build-q80sq0wm
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/parler-tts.git /tmp/pip-req-build-q80sq0wm
  Resolved https://github.com/huggingface/parler-tts.git to commit 8e465f1b5fcd223478e07175cb40494d19ffbe17
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting webrtcvad-wheels (from -r /content/requirements.txt (line 2))
  Downloading webrtcvad_wheels-2.0.11.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting ctranslate2 (from -r /content/requirements.txt (line 10))
  Downloading ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting faster_whisper (

In [None]:
!apt-get install portaudio19-dev

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libportaudio2 libportaudiocpp0
Suggested packages:
  portaudio19-doc
The following NEW packages will be installed:
  libportaudio2 libportaudiocpp0 portaudio19-dev
0 upgraded, 3 newly installed, 0 to remove and 45 not upgraded.
Need to get 188 kB of archives.
After this operation, 927 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudio2 amd64 19.6.0-1.1 [65.3 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudiocpp0 amd64 19.6.0-1.1 [16.1 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 portaudio19-dev amd64 19.6.0-1.1 [106 kB]
Fetched 188 kB in 1s (317 kB/s)
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 123595 files and directories currently installed.)
Preparing to unpack .../libportaudio2_19.6.0-1.

In [None]:
!pip install pyaudio

Collecting pyaudio
  Downloading PyAudio-0.2.14.tar.gz (47 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/47.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyaudio
  Building wheel for pyaudio (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pyaudio: filename=PyAudio-0.2.14-cp310-cp310-linux_x86_64.whl size=63855 sha256=50d0d96ef571285d70cd4b2f2da164404d9af36b1180b80dbdce5920222f0ff3
  Stored in directory: /root/.cache/pip/wheels/d6/21/f4/0b51d41ba79e51b16295cbb096ec49f334792814d545b508c5
Successfully built pyaudio
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14


In [None]:
! pip install streamlit -q

In [None]:
!wget -q -O - ipv4.icanhazip.com

34.125.195.44


In [None]:
! streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.195.44:8501[0m
[0m
your url is: https://curly-years-buy.loca.lt
2024-08-25 14:49:58.276716: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-25 14:49:58.449474: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-25 14:49:58.493271: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alrea

In [None]:
#requirments.txt
