# Import Librarires
---

In [1]:
import pyaudio  # For capturing audio input from the microphone
import wave  # For saving and reading WAV audio files
import whisper  # OpenAI's speech recognition model for transcribing audio
import ollama  # Interface for interacting with language models like Gemma or Mistral
from TTS.api import TTS  # Coqui TTS API for text-to-speech synthesis
import IPython.display as ipd  # For playing audio within Jupyter notebooks
import contextlib  # Provides utilities for working with context managers
import os  # For interacting with the operating system 
import sys  # Provides access to system-specific parameters and functions

# Context Manager:  `suppress_output` 
---
This context manager temporarily suppresses all most of the tts output to the console by redirecting `sys.stdout` and `sys.stderr` to `os.devnull`, effectively silencing any messages within the block.


In [2]:
@contextlib.contextmanager
def suppress_output():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        old_stderr = sys.stderr
        sys.stdout = devnull
        sys.stderr = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout
            sys.stderr = old_stderr

# LiveChatBot: A Voice-Enabled Time-Wasting Conversational Agent for Fraud Calls
---
The `LiveChatBot` class is designed to engage with scam callers, wasting their time and resources. By simulating a slightly confused but overly polite persona, it keeps fraudsters occupied, preventing them from targeting real victims.


In [3]:
class LiveChatBot:
    DEFAULT_WHISPER_MODEL = "tiny"
    DEFAULT_TTS_MODEL = "tts_models/en/ljspeech/tacotron2-DDC"

    def __init__(self,
                 model_name=None,
                 tts_model=None,
                 wave_output_filename="temp_audio.wav",
                 record_seconds=5,
                 rate=16000,
                 chunk=1024,
                 channels=1,
                 llama_model='gemma3:1b'):

        # Use defaults if none provided
        self.model_name = model_name or self.DEFAULT_WHISPER_MODEL
        self.tts_model = tts_model or self.DEFAULT_TTS_MODEL
      

        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = channels
        self.RATE = rate
        self.CHUNK = chunk
        self.RECORD_SECONDS = record_seconds
        self.WAVE_OUTPUT_FILENAME = wave_output_filename


        # Load models once
        self.whisper_model = whisper.load_model(self.model_name)
        
        

        self.llama_model = llama_model

    def record_audio(self):
        audio = pyaudio.PyAudio()
        stream = audio.open(format=self.FORMAT,
                            channels=self.CHANNELS,
                            rate=self.RATE,
                            input=True,
                            frames_per_buffer=self.CHUNK)

        print(f"🎤 Listening for {self.RECORD_SECONDS} seconds")
        frames = [stream.read(self.CHUNK) for _ in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS))]
        print("✅ Recording complete")

        stream.stop_stream()
        stream.close()
        audio.terminate()

        with wave.open(self.WAVE_OUTPUT_FILENAME, 'wb') as wf:
            wf.setnchannels(self.CHANNELS)
            wf.setsampwidth(audio.get_sample_size(self.FORMAT))
            wf.setframerate(self.RATE)
            wf.writeframes(b''.join(frames))

    def transcribe_audio(self):
        print("📝 Transcribing...\n")
        result = self.whisper_model.transcribe(self.WAVE_OUTPUT_FILENAME, fp16=False)
        return result['text']

    def generate_nonsense_response(self, user_text):
        prompt = f"""
                You are an Indian pretending to be a slightly confused but overly polite and curious person on a phone call. 
                Sound like Indian. The other person just said:

        \"{user_text}\"

                Reply with 1–2 sentences. Stay in character. Avoid giving any real information.
                Limit to 1–2 sentences.
                """
        print("💬 Generating nonsense response...")
        response = ollama.chat(
            model=self.llama_model,
            messages=[{"role": "user", "content": prompt}]
        )
        response['message']['content']
        return response['message']['content']

    def speak_text(self, text, filename="response.wav"):
        print(f"🤖 Speaking: {text}")
        with suppress_output():
            self.tts = TTS(model_name=self.tts_model, progress_bar=False)
            audio_array = self.tts.tts(text=text)

    
        
        return ipd.Audio(audio_array,rate=self.tts.synthesizer.output_sample_rate)


    def run(self):
        self.record_audio()
        user_text = self.transcribe_audio()
        print(f"🗣️ Transcribed Text: {user_text}")
        
       
        response = self.generate_nonsense_response(user_text)
            
        return self.speak_text(response)

# LiveChatBot: Engaging with Scam Callers
---
The following code demonstrates how to utilize the `LiveChatBot` class to interact with scam callers:

In [4]:
bot = LiveChatBot(record_seconds=10)
audio = bot.run()
audio 

🎤 Listening for 10 seconds
✅ Recording complete
📝 Transcribing...

🗣️ Transcribed Text:  Hello, this is the medical assistant. How can I help you?
💬 Generating nonsense response...
🤖 Speaking: Oh my goodness, it’s so lovely to hear from you!  I was just wondering if you’ve ever… well, perhaps encountered a particularly vibrant hue of sunset?
