In [None]:
"""
Description:
An open-source document-based Q&A assistant using LangChain, FAISS, HuggingFace Embeddings, and TinyLLaMA (1.1B) LLM.
Supports PDF and TXT files with chunking, embedding, and semantic retrieval.

Features:
- Load PDF/TXT documents
- Chunk with overlap
- Embed using all-MiniLM-L6-v2
- Store in FAISS vector store
- Query using TinyLLaMA model
"""


In [None]:
import os
import speech_recognition as sr
from gtts import gTTS
from playsound import playsound
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

# Make sure to set your OpenAI API Key
os.environ["OPENAI_API_KEY"] = "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"

# Initialize OpenAI Chat Model via LangChain
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.7)

def listen_to_voice():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Say something...")
        audio = recognizer.listen(source)

    try:
        print("Recognizing speech...")
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand audio.")
        return None
    except sr.RequestError as e:
        print(f"Could not request results; {e}")
        return None

def speak(text):
    print(f"Responding: {text}")
    tts = gTTS(text=text, lang='en')
    tts.save("response.mp3")
    playsound("response.mp3")
    os.remove("response.mp3")

def chat_with_ai(prompt):
    response = chat([HumanMessage(content=prompt)])
    return response.content

def main():
    print("Voice Assistant Ready (Say 'exit' to quit)")
    while True:
        prompt = listen_to_voice()
        if prompt is None:
            continue
        if prompt.lower() == "exit":
            break
        reply = chat_with_ai(prompt)
        speak(reply)

if __name__ == "__main__":
    main()


In [None]:
import base64
from IPython.display import Javascript, display
from google.colab import output

record_js = """
const record_js = `
const sleep = time => new Promise(resolve => setTimeout(resolve, time));

var record = async () => {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const recorder = new MediaRecorder(stream, {mimeType: 'audio/wav'}); // Specify wav mimeType
  let data = [];

  recorder.ondataavailable = event => data.push(event.data);
  recorder.start();

  await sleep(4000);  // record for 4 seconds
  recorder.stop();

  await new Promise(resolve => recorder.onstop = resolve);
  const blob = new Blob(data, { type: 'audio/wav' });  // Ensure it's saved as wav
  const arrayBuffer = await blob.arrayBuffer();
  const base64 = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));

  google.colab.kernel.invokeFunction('notebook.audio_callback', [base64], {});
};

record();
`;

"""

def save_audio(b64_data):
    with open("recorded.wav", "wb") as f:
        f.write(base64.b64decode(b64_data))

output.register_callback('notebook.audio_callback', save_audio)
display(Javascript(record_js))


In [None]:
import whisper
model = whisper.load_model("base")
result = model.transcribe("recorded.wav")
text = result["text"]
print("You said:", text)

In [None]:
from pydub import AudioSegment
import speech_recognition as sr
from IPython.display import Audio

def convert_to_pcm_wav(input_file, output_file="converted.wav"):
    print("Converting MP3 to WAV...")
    audio = AudioSegment.from_file(input_file)
    audio.export(output_file, format="wav")
    print("Conversion done.")
    return output_file

def listen_to_voice(file_path="recorded.mp3"):
    wav_path = convert_to_pcm_wav(file_path)
    Audio(wav_path)  # Play to confirm sound
    recognizer = sr.Recognizer()
    with sr.AudioFile(wav_path) as source:
        print("Processing audio...")
        audio = recognizer.record(source)  # or duration=5

    try:
        print("Recognizing...")
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except Exception as e:
        print("Error:", e)
        return None
text = listen_to_voice("recorded.mp3")
print("Final Transcription:", text)


In [None]:
from transformers import pipeline
from gtts import gTTS
from pydub import AudioSegment
import speech_recognition as sr
from IPython.display import Audio as ColabAudio

# Load a lightweight LLM (change to bigger if using GPU: 'tiiuae/falcon-7b-instruct' or 'mistralai/Mistral-7B-Instruct-v0.1')
generator = pipeline("text2text-generation", model="google/flan-t5-small")

def convert_to_pcm_wav(input_file, output_file="converted.wav"):
    audio = AudioSegment.from_file(input_file)
    audio.export(output_file, format="wav")
    return output_file

def listen_to_voice(file_path="recorded.mp3"):
    wav_path = convert_to_pcm_wav(file_path)
    recognizer = sr.Recognizer()
    with sr.AudioFile(wav_path) as source:
        print("Processing audio...")
        audio = recognizer.record(source)

    try:
        print("Recognizing...")
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except Exception as e:
        print("Error:", e)
        return None

def speak(text):
    print(f"Responding: {text}")
    tts = gTTS(text=text, lang='en')
    tts.save("response.mp3")
    return ColabAudio("response.mp3", autoplay=True)

def chat_with_llm(prompt):
    print("Generating response...")
    result = generator(prompt, max_length=100)[0]['generated_text']
    return result.strip()

def main():
    print("Voice Assistant Ready (Say 'exit' to quit)")
    while True:
        user_input = listen_to_voice("recorded.mp3")  # Upload this each time
        if not user_input:
            continue
        if user_input.lower() == "exit":
            break
        reply = chat_with_llm(user_input)
        speak(reply)


In [None]:
text = listen_to_voice("recorded.mp3")
reply = chat_with_llm(text)
speak(reply)