In [30]:
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from uuid import uuid4

# Create the embedding function
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Initialize Chroma with the correct embedding function
vector_store = Chroma(
    collection_name="chatbot",
    embedding_function=embedding_function,
    persist_directory="./chroma_langchain_db"
)

document_1 = Document(
    page_content="The International University, established in December 2003, is the only multidisciplinary public university in Vietnam that teaches and conducts research entirely in English. The university offers undergraduate and postgraduate programs, focusing on economics, management, and engineering technology. With a model that meets international standards, the university collaborates with prestigious universities from the United States, Europe, and the Asia-Pacific region. The university's goal is to become a leading research university in Vietnam and the region, providing high-quality human resources to meet the demands of integration.",
    id=1,
)

document_2 = Document(
    page_content="The International University offers 23 training programs granted by the International University itself, and 20 training programs in affiliation with prestigious partners.",
    id=2,
)

document_3 = Document(
    page_content="The university offers a wide range of majors, including: Marketing, Statistics, Economics, Chemical Engineering, Chemistry (Biochemistry), Food Technology, Biotechnology, Accounting, Finance and Banking, Environmental Engineering, Aerospace Engineering, Financial Engineering and Risk Management (Applied Mathematics), Construction Management, Civil Engineering, Electronics and Telecommunications Engineering, Biomedical Engineering, Control and Automation Engineering, Industrial Systems Engineering, Logistics and Supply Chain Management, Data Science, Information Technology, Computer Science, English Language, and Business Administration.",
    id=3,
)
documents = [
    document_1,
    document_2,
    document_3,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

results = vector_store.similarity_search_by_vector(
    embedding=embedding_function.embed_query("How many programs do university offers"), k=1
)
for doc in results:
    print(f"* {doc.page_content}")




* The International University offers 23 training programs granted by the International University itself, and 20 training programs in affiliation with prestigious partners.


In [31]:
# Generate response
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()


In [32]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [33]:
from langchain import hub
retriever = vector_store.as_retriever()
prompt = hub.pull("rlm/rag-prompt")



In [34]:
example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()
example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:", additional_kwargs={}, response_metadata={})]

In [35]:
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


In [36]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [41]:
rag_chain.invoke("Show me all major")

'The major programs offered by the university include Marketing, Statistics, Economics, Chemical Engineering, Chemistry (Biochemistry), Food Technology, Biotechnology, Accounting, Finance and Banking, Environmental Engineering, Aerospace Engineering, Financial Engineering and Risk Management, Construction Management, Civil Engineering, Electronics and Telecommunications Engineering, Biomedical Engineering, Control and Automation Engineering, Industrial Systems Engineering, Logistics and Supply Chain Management, Data Science, Information Technology, Computer Science, English Language, and Business Administration.'