In [64]:
'''
################################################################
# File: talkollama.py
# Purpose:
# - voice detection via microphone
# - Q/A to a local Large Language Model
# - Voice input
# - Voice output
# - no internet access necessary
#
# Licence: Apache 2.0
#
# Martin Hummel
# created on Thu Oct 24 09:09:50 2024
# v2.0
################################################################
'''

'\n################################################################\n# File: talkollama.py\n# Purpose:\n# - voice detection via microphone\n# - Q/A to a local Large Language Model\n# - Voice input\n# - Voice output\n# - no internet access necessary\n#\n# Licence: Apache 2.0\n#\n# Martin Hummel\n# created on Thu Oct 24 09:09:50 2024\n# v2.0\n################################################################\n'

In [65]:
import io
import json
import pyaudio
import vosk
import requests

from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.agents import initialize_agent, Tool, AgentType
from langchain_community.tools import DuckDuckGoSearchResults

from gtts import gTTS

from pydub import AudioSegment
from pydub.playback import play

# Language in which you want to convert
language = 'en'

In [66]:
# Initialize the Ollama LLM (LLaMA 3)
ollama_llm = OllamaLLM(model="llama3.2")

# Load the Vosk model
model = vosk.Model(lang="en-us")    
recognizer = vosk.KaldiRecognizer(model, 16000 )

LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=10 max-active=3000 lattice-beam=2
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:6:7:8:9:10
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from /home/martin/.cache/vosk/vosk-model-small-en-us-0.15/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:282) Loading HCL and G from /home/martin/.cache/vosk/vosk-model-small-en-us-0.15/graph/HCLr.fst /home/martin/.cache/vosk/vosk-model-small-en-us-0.15/graph/Gr.fst
LOG (VoskAPI:ReadDataFiles():model.cc:308) Loading winfo /home/martin/.cache/vosk/vosk-model-small-en-us-0.15/graph/phones/word_boundary.int


In [67]:
def duckduckgo_search(query: str) -> str:
            search_tool = DuckDuckGoSearchResults()
            try:
                # Disable SSL certificate verification
                print('###',query)
                requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
                results = search_tool.invoke(query)
                print('++++',results)
                return results
            except Exception as e:
                return f"Error during search: {str(e)}"

In [68]:
# Create the prompt template
prompt_template = """You are an AI assistant specialized in artificial intelligence, robotics, and automotive technology. 
Your job is to help generate creative ideas and answer questions on these topics. Use the latest research and trends in your responses.
Explain exactly three ideas and make your answer short. Stop after the answer.

Question: {question}

Assistant: """
prompt = PromptTemplate(template=prompt_template, input_variables=["question"])

# Create the LLM chain for creative question answering
creative_chain = LLMChain(llm=ollama_llm, prompt=prompt)

# Create tools for the agent
tools = [
    Tool(
        name="CreativeAssistant",
        func=creative_chain.run,
        description="Generate creative ideas and answers related to AI, robotics, and autonomous vehicles."
    ),
    Tool(
        name="DuckDuckGoSearch",
        func=duckduckgo_search,
        description="Search the web for the latest information."
    )
]

In [69]:
# Initialize the agent
agent = initialize_agent(
    tools=tools,
    llm=ollama_llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True,
    action_input_format="Action Input:"
)

In [70]:
def handle_query(query: str):
    # Run the agent with the query
    print('>>>>>',query)
    response = agent.run(query)
    print('<<<<<',response)

    # Creating the gTTS object
    speech = gTTS(text=response, lang='en', slow=False)
    audio_buffer = io.BytesIO()
    speech.write_to_fp(audio_buffer)

    # Convert to AudioSegment and play
    audio_buffer.seek(0)
    audio = AudioSegment.from_file(audio_buffer, format="mp3")
    play(audio)

    return response

In [71]:
def init_microphon():
    # Start audio stream with error handling for device selection
    # Open the microphone stream
    device = pyaudio.PyAudio()
    stream = device.open(format=pyaudio.paInt16,
                         channels=1,
                         rate=16000,
                         input=True,
                         frames_per_buffer=8192
                         )
        
    return device, stream

In [72]:
def start_listening(device, stream):
    
    print("Listening...")
    
    # Specify the path for the output text file
    output_file_path = "recognized_text.txt"
    
    def stop_listening(device, stream): 
        print("Voice Assistant Stopped.")    
        stream.stop_stream()
        stream.close()
        device.terminate()

    # Open a text file in write mode using a 'with' block
    #try:
    with open(output_file_path, "w") as output_file:
        print("Listening for speech. Say 'Terminate' to stop.")
        # Start streaming and recognize speech
        while True:
                data = stream.read(1024)#read in chunks of 4096 bytes
                if recognizer.AcceptWaveform(data):#accept waveform of input voice
                    
                    # Parse the JSON result and get the recognized text
                    result = json.loads(recognizer.Result())
                    recognized_text = result['text']
                    
                    # Write recognized text to the file
                    output_file.write(recognized_text + "\n")
                    print(recognized_text)
                    response = ""
                    response = handle_query(recognized_text)
                    #print(response)
                    
                    # Check for the termination keyword
                    if "terminate" in recognized_text.lower():
                        print("Termination keyword detected. Stopping...")
                        # Stop and close the stream
                        stop_listening(device, stream)
                        break
        
    #except Exception as e:
    #    print("KeyboardInterrupt: Stopping the Voice Assistant...")
    #    stop_listening(device, stream)

In [73]:
if __name__ == "__main__":
    device, stream = init_microphon()
    start_listening(device, stream)
    
    

Listening...
Listening for speech. Say 'Terminate' to stop.
what is the role of the we get in the modern society
>>>>> what is the role of the we get in the modern society


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: what is the role of the we get in the modern society

Thought: The concept of "we" gets in the modern society can be interpreted in various ways, but I'll consider it as a reference to collective consciousness, social norms, and shared values.

Action: CreativeAssistant
Action Input:[0m
Observation: [36;1m[1;3mI'd be happy to help with creative ideas related to AI, robotics, and automotive technology! Here are three potential concepts:

1. **Autonomous Vehicle Ecosystems**: Design a network of interconnected autonomous vehicles that work together to optimize traffic flow, reduce congestion, and improve air quality. These ecosystems could utilize AI-powered sensors and edge computing to enable real-time decision-making and adapt to changing traf

TypeError: duckduckgo_search() missing 1 required positional argument: 'query'