# Dependencies

In [15]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
import bs4
from langchain import hub
from langchain.document_loaders import DirectoryLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Ingest into vectordb

### Embedding model

model=thenlper/gte-large
volume=/data/llm/hf-tei-data
docker images run --gpus all --env HTTPS_PROXY=$https_proxy --env HTTP_PROXY=$http_proxy -p 8188:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:turing-0.6 --model-id $model

In [3]:
from langchain_community.embeddings import HuggingFaceHubEmbeddings
embeddings_model = HuggingFaceHubEmbeddings(model="http://localhost:8188")

### Load, chunk, store source data

In [8]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("documents/2024 Intel Flex Engineering Malaysia - NEO_20240228.pdf")
pages = loader.load()

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
splits = text_splitter.split_documents(pages)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory="database/chromadb")

In [16]:

vecstore = Chroma(persist_directory="database/chromadb", embedding_function=)

# Retrieval

In [6]:
# Retrieve and generate using the relevant snippets of the blog.

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
retrieved_docs = retriever.invoke("Who are the new hires?")
retrieved_docs

[Document(page_content='Intel Confidential Intel Flex\n 2Intel Flex Malaysia Engineering & TFM\nRecent New Hires\nEngineering TFM\nTan, An Nie\nMohamad, Siti Nurhanisah\n(Hanisah)\nTai, Andre\nWei Xiang\nEe, Elgene  \nDing RenNah, Wan Jun\n(Nicole )\nTay, Xue Hao\n(Adam )Ong, Frankie\nWei Quan', metadata={'page': 1, 'source': 'documents/2024 Intel Flex Engineering Malaysia - NEO_20240228.pdf'}),
 Document(page_content='Intel Confidential Intel Flex\n 13Experienced employee assigned to \nbefriend the new hire in order to help \nintegrating the new hire to Intel and \nIntel Flex.\nHello, buddy!\nQuarterly Group Buddy Meetup1:1 Meeting\n6 months\nBuddy Check List\n Buddy Lunch\nLearn More: Intel Flex Malaysia - New Hire Integration Program', metadata={'page': 12, 'source': 'documents/2024 Intel Flex Engineering Malaysia - NEO_20240228.pdf'}),
 Document(page_content='Intel Confidential Intel Flex\n 16\nEach new hire will receive multiple ramp -up list, incl. Focused List \nfrom Mini -TFA, 

In [7]:
len(retrieved_docs)

6

# Chat Model

In [8]:
from langchain_core.messages import HumanMessage
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    openai_api_version=os.environ["OPENAI_API_VERSION"],
    azure_deployment="llm-rag-chatgpt35",
    max_tokens=2048,
    temperature=0.7
)

In [9]:
# Create prompt template
from langchain_core.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

# RAG Chain

In [10]:
# Pipeline
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)


In [11]:
rag_chain.invoke("Who are the new hires")

'Tan, An Nie\nMohamad, Siti Nurhanisah (Hanisah)\nTai, Andre\nWei Xiang\nEe, Elgene\nDing RenNah, Wan Jun (Nicole)\nTay, Xue Hao (Adam)\nOng, Frankie\nWei Quan\n\nThanks for asking!'

# Modularized

In [1]:
from rag.generate import *
from rag.ingest import *
from rag.retrieve import *
import textwrap

In [2]:
# Query
query = "Who are the new hires?"
chromadb_save_path = "database/chromadb/flex_neo"

# Ingest
print("Ingest")
embedding_model = get_embedding_model("http://localhost:8188")
documents = load_documents("documents/2024 Intel Flex Engineering Malaysia - NEO_20240228.pdf")
chunks = chunk_documents(doc= documents, chunk_size=2000, chunk_overlap=0)
save_chunks_to_chroma(chunks, embedding_model, chromadb_save_path)

# Retriever
print("\nRetrieval")
retriever = get_chroma_retriever(chromadb_save_path, embedding_model)
contexts = retrieve(retriever, query)

# Rag Chain
print("\nGeneration")
llm = get_llm_azure(deployment_name = "llm-rag-chatgpt35", max_tokens=2048, temperature=0.7)
prompt_template = get_prompt_template() # Use the default template by not giving input args
rag_chain = create_rag_chain(retriever, prompt_template, llm)

# Chat
import time
t1 = time.time()
generated = rag_chain.invoke(query)
t2 = time.time() -t1
word_count = len(generated.split())
print(f"Generation took {(t2)*1000:.4f} ms. {word_count} words. {word_count / t2:.4f} words per second.\n")

print("Results:")
print(f" Query:\n\t{query}")
print(f" Rag generation:\n\t{textwrap.fill(generated, width=100)}")

Ingest
load_documents took 130.0693 ms.
chunk_documents took 0.6421 ms.
save_chunks_to_chroma took 589.7884 ms.

Retrieval
retrieve took 29.3779 ms.

Generation
Generation took 2030.2331 ms. 34 words. 16.7468 words per second.

Results:
 Query:
	Who are the new hires?
 Rag generation:
	The new hires are Tan, An Nie; Mohamad, Siti Nurhanisah (Hanisah); Tai, Andre; Wei Xiang; Ee,
Elgene; Ding RenNah, Wan Jun (Nicole); Tay, Xue Hao (Adam); Ong, Frankie; and Wei Quan. Thanks for
asking!


In [15]:
query = "What does intel flex engineering do?"
print(" Response without RAG:\n")
print(textwrap.fill(llm.invoke(query).content, width=100))
print()
print(" Response with RAG:\n")
print(textwrap.fill(rag_chain.invoke(query)))

 Response without RAG:

Intel Flex Engineering is a program offered by Intel Corporation that allows employees to have a
flexible work schedule and location. It enables employees to work from different locations,
including their homes or other remote locations, as long as they meet their job requirements and
deliverables. The program aims to provide employees with a better work-life balance and increased
job satisfaction by allowing them to have more control over their work schedule and environment.

 Response with RAG:

Intel Flex Engineering is a cross-platform software engineering team
that focuses on innovative solutions and industry-leading
technologies. They aim to improve developer effectiveness through
automated CI/CD and analytics for faster deployments with better
quality. Thanks for asking!


# Mistral 7b

In [4]:
from rag.generate import *
from rag.ingest import *
from rag.retrieve import *
import textwrap

In [5]:
# Query
query = "Who are the new hires?"
chromadb_save_path = "database/chromadb/flex_neo"

# Ingest
print("Ingest")
embedding_model = get_embedding_model("http://localhost:8188")
documents = load_documents("documents/2024 Intel Flex Engineering Malaysia - NEO_20240228.pdf")
chunks = chunk_documents(doc= documents, chunk_size=2000, chunk_overlap=0)
save_chunks_to_chroma(chunks, embedding_model, chromadb_save_path)

# Retriever
print("\nRetrieval")
retriever = get_chroma_retriever(chromadb_save_path, embedding_model)
contexts = retrieve(retriever, query)

# Rag Chain 
print("\nGeneration")
mistral_llm = get_llm_llamacpp("/data/llm/models/mistral-7b-instruct-v0.2.FP16.gguf")
prompt_template = get_prompt_template() # Use the default template by not giving input args
rag_chain = create_rag_chain(retriever, prompt_template, mistral_llm)

# Chat
import time
t1 = time.time()
generated = rag_chain.invoke(query)
t2 = time.time() -t1
word_count = len(generated.split())
print(f"Generation took {(t2)*1000:.4f} ms. {word_count} words. {word_count / t2:.4f} words per second.\n")

print("Results:")
print(f" Query:\n\t{query}")
print(f" Rag generation:\n\t{textwrap.fill(generated, width=100)}")

Ingest
load_documents took 254.5052 ms.
chunk_documents took 0.5574 ms.
ChromaDB already exists at the specified path. No changes made.
save_chunks_to_chroma took 0.1526 ms.

Retrieval
retrieve took 33.6764 ms.

Generation
Generation took 75586.6714 ms. 30 words. 0.3969 words per second.

Results:
 Query:
	Who are the new hires?
 Rag generation:
	 The new hires are Tan An Nie, Mohamad Siti Nurhanisah (Hanisah), Tai Andre, Wei Xiang, Ee Elgene,
Ding Ren, Nicole Wan Jun, Tay Xue Hao, Adam Ong Frankie, Wei Quan.
