# Introduction to Orchestration Frameworks

In [8]:
import os
from dotenv import load_dotenv
import numpy as np

from sentence_transformers import SentenceTransformer
from openai import OpenAI


In [2]:
load_dotenv()
api = os.getenv("paid_api2")

if not api:
    print("OPEN_AI_KEY not found")
else:
    print("API key loaded successfully")

API key loaded successfully


In [3]:
# Sample documents for all examples
documents = [
    "Python is a high-level programming language known for readability and simplicity.",
    "Machine learning is a subset of AI that enables systems to learn from data.",
    "RAG combines retrieval and generation to provide accurate, grounded responses."
]

## Custom Implementation

In [5]:
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(documents)

#Query and retrieve
query = "What is RAG?"
query_embedding = model.encode([query])[0]

#Compute similarity
similarities = np.dot(doc_embeddings, query_embedding)
top_idx = np.argmax(similarities)
retrieved_doc = documents[top_idx]

In [6]:
retrieved_doc

'RAG combines retrieval and generation to provide accurate, grounded responses.'

In [9]:
#Generate response
client = OpenAI(api_key= api)
prompt = f"""Context: {retrieved_doc}

Question: {query}
Answer based on the context:
"""

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=1
)
response

ChatCompletion(id='chatcmpl-CxAnbmt4XbLRxsDtZseQ7hSJ4xK86', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='RAG is a system that uses a combination of retrieval and generation techniques to produce accurate and well-informed responses.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1768218899, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier='default', system_fingerprint=None, usage=CompletionUsage(completion_tokens=23, prompt_tokens=35, total_tokens=58, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [10]:
print("Custom RAG Answer:")
print(response.choices[0].message.content)

Custom RAG Answer:
RAG is a system that uses a combination of retrieval and generation techniques to produce accurate and well-informed responses.


## LangChain

In [1]:
import os
from dotenv import load_dotenv

In [3]:
documents = [
    "Python is a high-level programming language known for readability and simplicity.",
    "Machin learning is a subset of AI that enables systems to learn from data.",
    "RAG combines retrieval and generation to provide accurate, grounded response."
]

### Custom Implementation

In [4]:
from sentence_transformers import SentenceTransformer
import numpy as np
from openai import OpenAI


  from .autonotebook import tqdm as notebook_tqdm


In [5]:

#Generate embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = model.encode(documents)
# print(doc_embeddings)

In [6]:
#Query and retrieve
query = "What is RAG"
query_embedding = model.encode([query])[0]
# print(query_embedding)

In [7]:
#Compare similarity
similarities = np.dot(doc_embeddings, query_embedding) #uses dot similarities
top_idx = np.argmax(similarities) #returns the indices of the top most similar doc
retrieved_doc = documents[top_idx] #stores the retrieved document using it's index

In [8]:
retrieved_doc

'RAG combines retrieval and generation to provide accurate, grounded response.'

In [9]:
# Generate response
client = OpenAI(api_key=api)
prompt = f"""Context: {retrieved_doc}
Question: {query}
Answer based on the context"""

response = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[{"role": "user", "content": prompt}],
    temperature=0
)

In [10]:
response

ChatCompletion(id='chatcmpl-ClX7TzGAVknGhBb3graL2bp8O74T6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='RAG is a system that combines retrieval and generation techniques to provide accurate and grounded responses.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1765444763, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier='default', system_fingerprint=None, usage=CompletionUsage(completion_tokens=18, prompt_tokens=34, total_tokens=52, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [11]:
print(response.choices[0].message.content)

RAG is a system that combines retrieval and generation techniques to provide accurate and grounded responses.


## LangChain implementation with faiss

## RAG with LangChain

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

#Convert documents
lc_docs = [Document(page_content=doc) for doc in documents] #converts documents into a format suitable for LangChain

embeddings = OpenAIEmbeddings(openai_api_key=api)
vectorstore = Chroma.from_documents(
    lc_docs,
    embeddings,
    collection_name="my_rag_collection",
    persist_directory='./chroma_db' #optional, this saves the DB locally
)

retriever = vectorstore.as_retriever()

#llm
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    openai_api_key=api
)

#prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert assistant. Use ONLY the retrieved content"),
    ("human", "{question}\n\nContext:\n{context}")
])

#Build RAG pipeline
rag_chain =(
    RunnableParallel(context=retriever, question=RunnablePassthrough())
    | prompt
    | llm
)

# Query
response = rag_chain.invoke("What is RAG?")
print(response)