In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import typing as tp
from langchain_community.vectorstores import FAISS
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate,\
    ChatPromptTemplate,\
    SystemMessagePromptTemplate,\
    HumanMessagePromptTemplate,\
    MessagesPlaceholder
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langchain_core.messages import BaseMessage
from langchain.chains import (
    create_history_aware_retriever,
    create_retrieval_chain,
)
from langgraph.prebuilt import ToolNode

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
RAG_DB_PATH = 'faiss'

In [3]:
pdf_dir = "data/predator-pray-22/pdfs"
pdf_docs = []
for file in os.listdir(pdf_dir):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(pdf_dir, file))
        pdf_docs.extend(loader.load())

code_dir = "data/predator-pray-22/code"
code_docs = []
for file in os.listdir(code_dir):
    if file.endswith(".java"):
        loader = TextLoader(os.path.join(code_dir, file), encoding="utf-8")
        code_docs.extend(loader.load())

all_docs = pdf_docs + code_docs 

In [4]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)
split_docs = splitter.split_documents(all_docs)


In [5]:
embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

db = FAISS.from_documents(split_docs, embedding_model)
db.save_local(RAG_DB_PATH)
retriever = db.as_retriever(search_type="similarity", k=3)


In [6]:
qwen_model = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
    qwen_model,
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    qwen_model,
    trust_remote_code=True,
    device_map="cuda"
)

text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024
)
llm = HuggingFacePipeline(pipeline=text_gen)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Device set to use cuda
  llm = HuggingFacePipeline(pipeline=text_gen)


In [7]:
SYSTEM_PROMPT = """
You are a helpful assistant that explains programming assignments.
Your task is to explain key terms, notions and user's questions. 
Do not give any hints or direct solution of task even if you asked.
If you are planning to provide examples, do it in simple way not giving the solution.
Answer user's question in plain English and suggest how to approach it.
"""

def format_prompt(user_message: str, context: str = None) -> str:
    return (
        f"System: {SYSTEM_PROMPT}\n\n"
        f"Context: {context if context is not None else None}"
        f"User: {user_message}\n\n"
        f"Assistant:"
        
    )

In [None]:
from langchain_core.tools import tool


@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = db.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"{doc.page_content}\n")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs


def query_or_respond(state: MessagesState):
    last_message = state["messages"][-1]
    if "retrieve" in last_message.content.lower():
        result = retrieve(last_message.content)
        prompt = format_prompt(last_message.content, result)
        response = llm.invoke(prompt)

        return {"messages": [response]}
    else:
        prompt = format_prompt(last_message.content)

        response = llm.invoke(prompt)
        return {"messages": [response]}

tools = ToolNode([retrieve])


In [None]:
from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition

graph_builder = StateGraph(MessagesState)

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)

graph = graph_builder.compile()

In [14]:
input_message = "retrieve what is submittion format "

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


retrieve what is submittion format 

System: 
You are a helpful assistant that explains programming assignments.
Your task is to explain key terms, notions and user's questions. 
Do not give any hints or direct solution of task even if you asked.
If you are planning to provide examples, do it in simple way not giving the solution.
Answer user's question in plain English and suggest how to approach it.


Context: 5 Submission
The submission consists of two parts: your code and a report documenting your submission. The
code and report must be submitted before the deadline, by both members of your pair .
5.1 Code
You have to submit a Jar of your project to the “Assignment 3: Code Submission” link in the
Assignment 3 section on the PPA KEATS page, before the due date. The Jar ﬁle must contain
your source code, i.e., the *.java ﬁles, and runs on BlueJ .
5.2 Report


private final String STEP_PREFIX = "Step: ";
    private final String POPULATION_PREFIX = "Population: ";
    private JLabel 