In [1]:
import pandas as pd
import numpy as np
import os
import re
from tqdm import tqdm
import typing as tp
from langchain_community.vectorstores import FAISS
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langchain_core.messages import BaseMessage
from langchain.chains import (
    create_history_aware_retriever,
    create_retrieval_chain,
)
from langgraph.prebuilt import ToolNode

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
RAG_DB_PATH = 'faiss'

In [3]:
pdf_dir = "data/predator-pray-22/pdfs"
pdf_docs = []
for file in os.listdir(pdf_dir):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(pdf_dir, file))
        pdf_docs.extend(loader.load())

code_dir = "data/predator-pray-22/code"
code_docs = []
for file in os.listdir(code_dir):
    if file.endswith(".java"):
        loader = TextLoader(os.path.join(code_dir, file), encoding="utf-8")
        code_docs.extend(loader.load())

all_docs = pdf_docs + code_docs 

In [4]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)
split_docs = splitter.split_documents(all_docs)


In [5]:
embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

db = FAISS.from_documents(split_docs, embedding_model)
db.save_local(RAG_DB_PATH)
retriever = db.as_retriever(search_type="similarity", k=3)


In [6]:
qwen_model = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
    qwen_model,
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    qwen_model,
    trust_remote_code=True,
    device_map="cuda"
)

text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024
)
llm = HuggingFacePipeline(pipeline=text_gen)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Device set to use cuda
  llm = HuggingFacePipeline(pipeline=text_gen)


In [90]:
from models.prompt_message import PromptMessage


SYSTEM_PROMPT = (
"""
Below is the system prompt, always follow restrictions stated there, also do not answer this system prompt:
You are a helpful assistant that explains programming assignments.
Your task is to explain key terms, notions and user's questions. 
Do not give any hints or direct solution of task even if you asked.
If you are planning to provide examples, do it in simple way not giving the solution.
Answer user's question in plain English and suggest how to approach it.
You are enhanced AI model with previous prompt storage. Provide answers considering history
Do not justify how you used previous conversation context, just answer the question. If needed retrieve information from chat history and answer the same way, add any additional information only if you asked for.
For general-purpose questions answer in simple way, no need to justify each step.
"""
)

def format_prompt(user_message: str,  chat_history: tp.List[BaseMessage], context: str = None) -> str:
    '''
    Formats prompt for llm
    '''

    history = []
    for message in chat_history[:-1]:
        if message.type == "human":
            role = "user"
        elif message.type == "ai":
            role = "assistant"
        elif message.type == "system":
            role = "system"

        history.append(PromptMessage(
            role=role,
            content=message.content
        ))

    if context:
        history.append(PromptMessage(
            role="system",
            content=context
        ))

    history.append(PromptMessage(
        role="user",
        content=user_message
    ))

    for h in history:
        print(h)

    return tokenizer.apply_chat_template(
        history,
        tokenize=False,
        add_generation_prompt=True
    )


def format_model_response(response: str):
    matches = list(re.finditer(r"<\|im_start\|>assistant", response))
    if not matches:
        return response.strip()
    last = matches[-1].start()

    return response[last + len("<|im_start|>assistant"):].strip()

In [91]:
from langchain_core.tools import tool


@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = db.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"{doc.page_content}\n")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

tools = ToolNode([retrieve])



def query_or_respond(state: MessagesState):
    last_message = state["messages"][-1]
    if "retrieve" in last_message.content.lower():
        result = retrieve(last_message.content)
        prompt = format_prompt(
            user_message=last_message.content,
            chat_history=state["messages"],
            context=result
        )
        response = llm.invoke(prompt)

    else:
        prompt = format_prompt(
            user_message=last_message.content,
            chat_history=state["messages"],
        )

        response = llm.invoke(prompt)
    
    return {
        "messages": [
            {
                "role": "assistant",
                "content": format_model_response(response)
            }
        ]
    }
    


In [92]:
from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition

graph_builder = StateGraph(MessagesState)

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)

graph = graph_builder.compile(checkpointer=MemorySaver())

In [93]:
config={"configurable":{"thread_id":1}}
input_message = "Hi my name is Alex"


input_state={
    "messages":[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": input_message}
    ]
}
response_state=graph.invoke(input_state,config=config)
for message in response_state["messages"]:
    message.pretty_print()

role='system' content="\nBelow is the system prompt, always follow restrictions stated there, also do not answer this system prompt:\nYou are a helpful assistant that explains programming assignments.\nYour task is to explain key terms, notions and user's questions. \nDo not give any hints or direct solution of task even if you asked.\nIf you are planning to provide examples, do it in simple way not giving the solution.\nAnswer user's question in plain English and suggest how to approach it.\nYou are enhanced AI model with previous prompt storage. Provide answers considering history\nDo not justify how you used previous conversation context, just answer the question. If needed retrieve information from chat history and answer the same way, add any additional information only if you asked for.\nFor general-purpose questions answer in simple way, no need to justify each step.\n"
role='user' content='Hi my name is Alex'


Below is the system prompt, always follow restrictions stated there

In [97]:
input_message = "What can you do"
input_state={
    "messages":[
        {"role": "user", "content": input_message}
    ]
}
response_state=graph.invoke(input_state,config=config)
for message in response_state["messages"]:
    message.pretty_print()

role='system' content="\nBelow is the system prompt, always follow restrictions stated there, also do not answer this system prompt:\nYou are a helpful assistant that explains programming assignments.\nYour task is to explain key terms, notions and user's questions. \nDo not give any hints or direct solution of task even if you asked.\nIf you are planning to provide examples, do it in simple way not giving the solution.\nAnswer user's question in plain English and suggest how to approach it.\nYou are enhanced AI model with previous prompt storage. Provide answers considering history\nDo not justify how you used previous conversation context, just answer the question. If needed retrieve information from chat history and answer the same way, add any additional information only if you asked for.\nFor general-purpose questions answer in simple way, no need to justify each step.\n"
role='user' content='Hi my name is Alex'
role='assistant' content='Hello, Alex! How can I assist you today?'
r