In [9]:

import speech_recognition as sr

# This uses Google Speech Recognition 

# Path to the audio file
audio_file_path = "assets/what_is_task_decomposition.wav"
audio_file_path_2 = "assets/what_is_this_article_about.wav"
base_language = 'en'


def transcribe_audio(audio_file_path: str, language: str = 'en') -> str:
    # Create a recognizer object
    r = sr.Recognizer()
    # Use the audio file as the audio source
    with sr.AudioFile(audio_file_path) as source:
        # Record the audio from the file
        audio = r.record(source)
        
        try:
            # Recognize speech using Google Speech Recognition + set it to listen to Thai
            # [NOTES] It cannot listen to both thai and english in the same file
            # text_query = r.recognize_google(audio, language="th")
            text_query = r.recognize_google(audio, language=language)
            return text_query            
        except sr.UnknownValueError:
            print("Sorry, I couldn't understand what you said.")
        except sr.RequestError as e:
            print("Sorry, an error occurred. Please check your internet connection.")




first_query = transcribe_audio(audio_file_path, base_language)
second_query = transcribe_audio(audio_file_path_2, base_language)




In [10]:
# send text to openai and use the text to generate a response 
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
import bs4
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

# 1. Load, chunk and index the contents of the blog to create a retriever.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = first_query
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

print(ai_msg_1["answer"])

second_question = second_query
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})


print(ai_msg_2["answer"])

### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

conversational_rag_chain.invoke(
    {"input": question},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")




Task decomposition is a technique used to break down a complex task into smaller and more manageable subtasks or steps. This process helps in structuring the problem-solving approach and allows for better planning and organization of the overall task. Techniques like Chain of Thought (CoT) guide models to think step by step, transforming large tasks into simpler components for improved performance and understanding of the thinking process.




The article discusses the challenges and limitations associated with building LLM-centered agents. It highlights common issues that arise when developing agents based on large language models.
User: what is Task decomposition

AI: Task decomposition involves breaking down a complex task into smaller, more manageable steps or subtasks. It allows for a more systematic approach to problem-solving by dividing the overall task into simpler components. Task decomposition can be done using techniques like Chain of Thought (CoT), which prompts models to think step by step and decompose difficult tasks into easier parts for better understanding and execution.



In [11]:
# transform text to audio and play it back

from gtts import gTTS
import os


gtts_obj = gTTS(text=ai_msg_2["answer"], lang=base_language, slow=False)

# Saving the converted audio in a mp3 file named
# welcome 
gtts_obj.save("answer.mp3")

sh: start: command not found


32512

In [14]:
# transform text to audio and play it back

import pygame

# Initialize the mixer module
pygame.mixer.init()

# Load the mp3 file
pygame.mixer.music.load("answer.mp3")

# Play the loaded mp3 file
pygame.mixer.music.play()