In [7]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

In [11]:
load_dotenv()

llm = ChatGroq(
    api_key=os.environ.get("GROQ_API_KEY"),
    model="openai/gpt-oss-20b"
)

# response = llm.invoke("Who is the president of Sri-Lanka?")
# print(response.content)

In [None]:
#from langchain.document_loaders import UnstructuredURLLoader
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# pdf_url = "https://www.gptaiflow.com/assets/files/2025-01-18-pdf-1-TechAI-Goolge-whitepaper_Prompt%20Engineering_v4-af36dcc7a49bb7269a58b1c9b89a8ae1.pdf"

# loader = UnstructuredURLLoader(urls=[pdf_url])
# documents = loader.load()

loader = PyMuPDFLoader("Docs/prompt_engineering.pdf")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

print(docs[100].page_content)

In [24]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_db = FAISS.from_documents(docs, embeddings)

# optional
# vector_db.save_local("faiss_index")

print("Embeddings stored in FAISS vector database.")

Embeddings stored in FAISS vector database.


In [32]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.schema import Document

retriever = vector_db.as_retriever()

prompt_template = """
You are a helpful assistant. Use the following context to answer the question.

Context:
{context}

Question:
{question}

Answer:
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

llm_chain = LLMChain(llm=llm, prompt=prompt)

def parser(llm_output):
    lines = llm_output.strip().split("\n")
    
    structured_output = {
        "header": "====== Response ======",
        "content": [line.strip() for line in lines if line.strip()],  
    }
    return structured_output

def query_pipeline(user_query):
    retrieved_docs = retriever.get_relevant_documents(user_query)
    
    # Combine the retrieved documents into a single context
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    
    # Pass the context and query to the LLM chain
    llm_output = llm_chain.run({"context": context, "question": user_query})
    
    # Parse the LLM output
    parsed_output = parser(llm_output)
    
    return parsed_output

user_query = "What is prompt engineering?"
response = query_pipeline(user_query)


print(response["header"])
for line in response["content"]:
    print(line)


**Prompt engineering** is the iterative process of crafting, refining, and optimizing the input (prompt) given to a large language model so that it predicts the desired output accurately. It involves selecting the right words, style, tone, structure, and context, as well as choosing the appropriate model and configurations. The goal is to design high‑quality prompts that guide the LLM to produce clear, accurate, and relevant responses.
