In [None]:
#pip install -qU langchain-groq
#pip install python-dotenv
#pip install -U langchain-huggingface

# Invoke LLM using RAG

# Imports

In [1]:
import os
from groq import Groq
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_groq import ChatGroq



from pathlib import Path
from langchain.document_loaders import DirectoryLoader, TextLoader

# --- use online prompt---
from langchain import hub

#--- custom prompt template ----------
from langchain_core.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain.schema.runnable import RunnableLambda

# Environment Setup

Note: Hugging Face is about model access and development, while Groq is about optimized hardware for running those models rapidly. 

# Setup GROQ - for accessing LLM model

In [2]:
# Load environment variables from .env file
load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")



# Create LLM instance from GROQ

In [3]:
llm = ChatGroq(model="llama3-8b-8192", temperature=0.90)

# Setup HuggingFace -- for accessing embedding model 

# we can use huggingface to get the enbedding in the cloud OR we can use local embedding model to get the embedding, e.g using OLAMA

N.B. https://python.langchain.com/docs/how_to/embed_text/

In [4]:
huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")



# Setup the embedding model


In [5]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",    
    encode_kwargs={"normalize_embeddings": True}  # Normalize embeddings for cosine similarity
)

# Test embeddings

In [6]:
# Example text to encode
text = "Hugging Face provides great NLP tools."

# Get the embeddings
embedding_vector = embeddings.embed_query(text)

print(embedding_vector)

[-0.04714725911617279, 0.031214414164423943, 0.06171492859721184, 0.01215528417378664, 0.03618667274713516, 0.018930858001112938, -0.05314081907272339, -0.05064268410205841, 0.006911817472428083, -0.016089562326669693, 0.05560332164168358, -0.00655879732221365, -0.020847702398896217, 0.05155806988477707, 0.10991392284631729, 0.062482450157403946, -0.023179661482572556, 0.058812081813812256, 0.03772741183638573, 0.009840325452387333, -0.010646358132362366, 0.0930664911866188, 0.045571282505989075, -0.09587831050157547, 0.022258056327700615, 0.013583502732217312, -0.033910539001226425, -0.03657962754368782, 0.10744249820709229, -0.002959347330033779, -0.009816817939281464, -0.01652284525334835, -0.0014099582331255078, 0.06383546441793442, -0.04078644886612892, 0.10934137552976608, 0.02877010777592659, 0.09480557590723038, -0.08343128114938736, -0.008077527396380901, -0.110220767557621, -0.015364536084234715, 0.018267905339598656, 0.004388584289699793, 0.09433092921972275, 0.0369383245706

# Read a pdf/txt file

# 1) Define the directory path

In [7]:
# Define the directory path
fulldir = Path("C:\\GenAI\\RAG\\llm_story_v1")  # Convert string to Path object


# Use the absolute path

In [8]:
dirloader = DirectoryLoader(
    fulldir.absolute(), 
    glob='**/*.txt', 
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"}  # Explicitly set encoding
)

# Instantiate the loader

In [9]:
print("Instantiated loader")
dirdata = dirloader.load()

Instantiated loader


# Split / chunk the file content

In [10]:

print("Split / chunk the file content")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=100)
splits = text_splitter.split_documents(dirdata)


Split / chunk the file content


# Testing ------> check the content of splits object

In [11]:
print(splits)

[Document(metadata={'source': 'C:\\GenAI\\RAG\\llm_story_v1\\story1.txt'}, page_content='The Fox and the Grapes\n\nOnce upon a time, a fox was traveling through the countryside, when he spotted a beautiful bunch of ripe grapes hanging from a vine. The vine was bound along the branch of a tree, and the perfect grapes hung right above the fox’s head.\n\nThe fox jumped to take a bite, but couldn’t reach the branch. So he backed up to take a running leap, and again fell short. He tried over and over, then sat down.\n\nThen he shook his head. “What a fool I’ve been,” he thinks. “Why, just look at those grapes! They’re obviously sour. Why have I been wasting my time on a bunch of sour grapes?” And with great disdain he trotted off.\n\nAesop’s moral: “There are many who pretend to despise and belittle that which is beyond their reach.”\n\nThis is a very useful story. It surprises me how often I fall into this trap—of deciding that something’s not worth having, if I fear I can’t have it.\n\nIn

# Validate the string - should not contain any special characters

In [12]:
for i, split in enumerate(splits):
    print(f"Split {i+1}:")
    print(split)

Split 1:
page_content='The Fox and the Grapes

Once upon a time, a fox was traveling through the countryside, when he spotted a beautiful bunch of ripe grapes hanging from a vine. The vine was bound along the branch of a tree, and the perfect grapes hung right above the fox’s head.

The fox jumped to take a bite, but couldn’t reach the branch. So he backed up to take a running leap, and again fell short. He tried over and over, then sat down.

Then he shook his head. “What a fool I’ve been,” he thinks. “Why, just look at those grapes! They’re obviously sour. Why have I been wasting my time on a bunch of sour grapes?” And with great disdain he trotted off.

Aesop’s moral: “There are many who pretend to despise and belittle that which is beyond their reach.”

This is a very useful story. It surprises me how often I fall into this trap—of deciding that something’s not worth having, if I fear I can’t have it.

In the diary of writer Virginia Woolf, I was struck to see her reminding herself

In [None]:
#for split in splits:
   # print(split)

In [16]:

documents = splits # Since splits are already strings, no need for doc.page_content

#print(documents)

# Testing ------>

In [17]:
#print(documents)


# Create the prompt - load the online prompt

In [18]:
prompt = hub.pull("rlm/rag-prompt")

# ------ Create Custom Prompt --------- Another way to create Custom prompt template

In [19]:
template = '''You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
            Question: {question} 
            Context: {context} 
            Answer:
            '''

custom_rag_prompt = PromptTemplate.from_template(template)


# ------- Format the document---- 

In [20]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in documents)

    
formatted_docs = format_docs(documents) 


print(type(formatted_docs))

<class 'str'>


# Create LangChain chain - Build the RAG Pipeline
This involves creating a query engine that can retrieve relevant information from your embeddings and augment the LLM’s responses.

In [21]:
rag_chain = (
    {"context": RunnableLambda(lambda _: formatted_docs), "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# ------- use custom prompt - template ---------- optional

In [None]:
'''
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)
'''

# Invoke the LLM with user question

In [22]:
rag_chain.invoke("what is the moral of the story ?")

"The moral of the story is that many people pretend to despise or belittle something that is beyond their reach, in order to hide their disappointment or lack of achievement. It's a form of self-deception that can be destructive."

In [24]:
rag_chain.invoke("write the summary of the story in 3 lines.")

"Here is a summary of the story in three lines:\n\nA fox tries to reach a bunch of ripe grapes but fails, and then decides they're sour and not worth having. The fox's behavior is a metaphor for people who pretend to despise something they can't have. The story teaches us not to pretend something is not worth having just because we can't get it."