In [2]:
#!pip install langchain chromadb transformers pytesseract torch torchvision


In [1]:
from langchain_core.prompts import PromptTemplate
from langchain import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import pytesseract
from PIL import Image


In [2]:
from huggingface_hub import login
# Login to Hugging Face Hub
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma(
    collection_name="database",
    embedding_function=embedding_model,
    persist_directory="database"
)
vectorstore._collection
num_chunks = len(vectorstore.get()["ids"])
print(f"Number of chunks loaded from 'database': {num_chunks}")
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")





  vectorstore = Chroma(


Number of chunks loaded from 'database': 1731


In [28]:
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")

llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300)
hf_llm = HuggingFacePipeline(pipeline=llm_pipeline)

Device set to use cpu


In [29]:
prompt = PromptTemplate(
    input_variables=["context", "query"],
    template="""
You are a helpful assistant with deep chess knowledge. 
Use the following context to answer the user query:

Context:
{context}

Question: {query}

Answer:
""".strip()
)
llm_chain = LLMChain(llm=hf_llm, prompt=prompt)

In [45]:
from langchain_core.documents import Document

def rag_qa(query: str) -> str:
    docs: list[Document] = retriever.get_relevant_documents(query)
    print(f"\nQuery: {query}")
    print(f"Retrieved {len(docs)} relevant document chunks")

    context = "\n\n".join([doc.page_content for doc in docs])
    response = llm_chain.invoke({"context": context, "query": query})["text"]

    if context in response:
        response = response.replace(context, "").strip()
    if query in response:
        response = response.replace(query, "").strip()

    return response

In [31]:
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v2")
def transcribe_audio(path): return asr(path).get("text","")
def extract_text_from_image(path): return pytesseract.image_to_string(Image.open(path))

Device set to use cuda:0


In [None]:
def intelligent_answer_agent(query: str) -> str:
    return rag_qa(query)

In [None]:
def recommendation_agent(user_input: str) -> str:
    prompt = f"Please recommend something based on this request: {user_input}"
    return rag_qa(prompt)

In [33]:
def summarization_agent(text: str) -> str:
    prompt = f"Summarize the following chess-related content in 3 sentences:\n\n{text}"
    return rag_qa(prompt)

In [34]:
def default_agent(query: str) -> str:
    prompt = f"You are a ChessBot, Answer the following question in a concise and helpful manner:\n\n{query}"
    return rag_qa(prompt)

In [35]:
def multimodal_agent(input_type: str, data_path: str) -> str:
    try:
        # Process input based on type
        if input_type == "text":
            user_input = data_path.lower()
        elif input_type == "voice":
            user_input = transcribe_audio(data_path)
            if not user_input or user_input.strip() == "":
                return "Failed to process audio input. Please check the file and try again."
        elif input_type == "image":
            user_input = extract_text_from_image(data_path)
            if not user_input or user_input.strip() == "":
                return "Failed to process image input. Please check the file and try again."
        else:
            return "Invalid input type. Use 'text', 'voice', or 'image'."

        if any(x in user_input for x in ["what is", "explain", "how does", "who"]):
            return intelligent_answer_agent(user_input)
        elif "recommend" in user_input or "suggest" in user_input:
            return recommendation_agent(user_input)
        elif "summarize" in user_input:
            return summarization_agent(user_input)
        else:
            return default_agent(user_input)
    except Exception as e:
        return f"An error occurred while processing the input: {str(e)}"

In [47]:
print(multimodal_agent("text", "recommend a good chess book"))


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: Please recommend something based on this request: recommend a good chess book
Retrieved 3 relevant document chunks
You are a helpful assistant with deep chess knowledge. 
Use the following context to answer the user query:

Context:


Question: 

Answer: The Complete Guide to Chess, Master: Chess Tactics, Chess Openings and Chess Strategies by Jose R. Capablanca

You can read the book in the following link: http://www.amazon.com/Complete-Guide-Chess-Master-Chess/dp/1412620104/ref=sr_1_1?ie=UTF8&qid=1369285886&sr=8-1&keywords=the+complete+guide+to+chess

You can read the book in the following link: http://www.amazon.com/Complete-Guide-Chess-Master-Chess/dp/1412620104/ref=sr_1_1?ie=UTF8&qid=1369285886&sr=8-1&keywords=the+complete+guide+to+chess

You can read the book in the following link: http://www.amazon.com/Complete-Guide-Chess-Master-Chess/dp/1412620104/ref=sr_1_1?ie=UTF8&qid=1369285886&sr=8-1&keywords=the+complete+guide+to+chess

You can read the book in the following link:

In [48]:
print(multimodal_agent("text", "summarize alekhine's opening"))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: Summarize the following chess-related content in 3 sentences:

summarize alekhine's opening
Retrieved 3 relevant document chunks
You are a helpful assistant with deep chess knowledge. 
Use the following context to answer the user query:

Context:


Question: 

Answer: In the opening, Alekhine's Defense is a popular opening for black. 
It is named after the Russian grandmaster, Alexander Alekhine, who was a very talented
chess player. 
The opening is a mix of the Queen's Gambit and the King's Indian Defense. 
It is a popular choice for black because it allows the development of the dark-squared
bishop, which is a key piece in the opening. 
The opening also has a very strong middlegame, which is why it is so popular. 
The opening is a popular choice for black because it allows the development of the dark-squared
bishop, which is a key piece in the opening. 
The opening also has a very strong middlegame, which is why it is so popular.

Question: Summarize the following chess-relat

In [49]:
print(multimodal_agent("image", r"C:\PES\CSSEM-6\GenAI\ChessBot\SICILIANDEFENSE.png"))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: You are a ChessBot, Answer the following question in a concise and helpful manner:

WHAT IS SICILIAN DEFENSE?

Retrieved 3 relevant document chunks
You are a helpful assistant with deep chess knowledge. 
Use the following context to answer the user query:

Context:


Question: 

Answer: Sicilian Defense is a chess opening that can be played by either side. It is named after the Italian chess master Emanuel Lasker, who introduced it in 1900. It is considered to be one of the most powerful chess openings, and has been used by many great chess players throughout history. The Sicilian Defense is a chess opening that can be played by either side. It is named after the Italian chess master Emanuel Lasker, who introduced it in 1900. It is considered to be one of the most powerful chess openings, and has been used by many great chess players throughout history.


In [51]:
print(multimodal_agent("voice", r"C:\PES\CSSEM-6\GenAI\ChessBot\KINGSGAMBIT.mp3"))

Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Query: You are a ChessBot, Answer the following question in a concise and helpful manner:

 What is King's Gambit?
Retrieved 3 relevant document chunks
You are a helpful assistant with deep chess knowledge. 
Use the following context to answer the user query:

Context:


Question: 

Answer: King's Gambit is a chess opening in which White plays 1. P-Q4 against Black's King's Knight's Pawn, with the idea of attacking the Queen's Bishop with the King's Bishop. 

The main line is 1. P-Q4 2. Kt-B3 P-Q4 3. Kt-KB3 Kt-B3 4. B-Kt5 Kt-B3 5. P-K3 P-B4 6. Kt-KB3 P-K3 7. R-—Bh6 RxP 8. RxP P-K4 9. B-Kt5 P-Q4 10. Kt-KB3 P-K4 11. B-Kt5 P-Q4 12. Kt-KB3 P-K4 13. B-Kt5 P-Q4 14. Kt-KB3 P-K4 15. B-Kt5 P-Q4 16. Kt-KB3 P-K4 17. B-Kt5 P-Q4 18. Kt-KB3 P-K4 19. B-Kt5 P-Q4 20. Kt-KB3 P-K4 21. B-Kt5 P-Q4 22. Kt-KB3 P-K4 23. KR—Kr RxR 24. RXR R-—QKt3 25. RXP R-—
