In [10]:
# Model calling and intial setup
import os
# from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import AzureChatOpenAI , AzureOpenAIEmbeddings
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser 
from langchain_core.runnables import RunnableConfig

import warnings
warnings.filterwarnings("ignore") 

load_dotenv(override= True)
# Load env
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
AZURE_BASE_URL = os.getenv("AZURE_BASE_URL")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_CHAT_DEPLIOYMENT_NAME = os.getenv("AZURE_CHAT_DEPLIOYMENT_NAME")
AZURE_EMBEDDING_DEPLIOYMENT_NAME = os.getenv("AZURE_EMBEDDING_DEPLIOYMENT_NAME")

# get all the langsmith based env 
LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")

parser = StrOutputParser()

# llm_gemini = ChatGoogleGenerativeAI(model="gemini-2.0-flash" , api_key= GOOGLE_API_KEY)

llm_openai = AzureChatOpenAI(
    model="gpt-4o-mini",                         
    deployment_name=AZURE_CHAT_DEPLIOYMENT_NAME ,  # deployment name in Azure
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_BASE_URL,
    api_version="2024-02-01",
    temperature=0.75
) 

embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-large",
                             deployment=AZURE_EMBEDDING_DEPLIOYMENT_NAME,
                             api_key= AZURE_OPENAI_API_KEY,
                             azure_endpoint= AZURE_BASE_URL,
                             api_version="2024-02-01"
                             )
# result = llm_openai.invoke("What are your creater, also what type of LLM are you").content
# print(result)
# llm_gemini.invoke("who is father of india").content

### what need observablity in AI services
- can track latency drop
- we can log complex llm workflow

### Langsmith
- use for obersvalibilt and eval platform , where team can deugs and moniter app performace

### What langsmith trace
 - i/p and o/p
 - all intermediate steps
 - latency 
 - cost 
 - error 
 - tags 
 - metadata 
 - feedback 

## Core Concept
1) Project 
whole project, that is executed mutiple time 

2) Trace 
- each time the project is execute it is a trace

3) Run
- excustion of each trace have mutiple steps, each of the steps is a single run

In [3]:
prompt = PromptTemplate(template="what is the name of india's first PM?")
chain = prompt | llm_openai | parser
result = chain.invoke(input={})

In [4]:
result

"India's first Prime Minister was Jawaharlal Nehru. He served from August 15, 1947, when India gained independence, until his death on May 27, 1964."

In [13]:
# from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

prompt1 = PromptTemplate(
    template='Generate a detailed report on {topic}',
    input_variables=['topic']
)
prompt2 = PromptTemplate(
    template='Generate a 5 pointer summary from the following text \n {text}',
    input_variables=['text']
)

# This is how one can add project name
os.environ['LANGSMITH_PROJECT']= "Sequential LLM App"
# llm_gemini = ChatGoogleGenerativeAI(model="gemini-2.0-flash" , api_key= GOOGLE_API_KEY , temperature=0.9)


parser = StrOutputParser()
chain = prompt1 | llm_openai | parser | prompt2 | llm_openai | parser

config = RunnableConfig(
    run_name="sequential_report_generation_v1",
    tags=["llm_app", "report_generation"]
)
result = chain.invoke({'topic': 'Role of Ai in unemplyoment in India'} , config= config)
print(result)

1. **AI's Dual Impact on Employment**: The report highlights that while AI in India can enhance productivity and create new job opportunities in sectors like IT and agriculture, it also poses significant risks of job displacement, particularly in manufacturing, customer service, and other routine task-oriented jobs.

2. **Current Employment Landscape**: With over 1.4 billion people in India and a workforce of around 500 million, the unemployment rate is approximately 7-8%, with a substantial portion of employment in the informal sector lacking job security and benefits.

3. **Job Displacement Risks**: AI-driven automation is leading to significant job losses, especially for roles involving repetitive tasks, while a mismatch in digital skills among the workforce hampers the transition to new jobs created by AI advancements.

4. **Opportunity for Job Creation**: AI is generating demand for new roles such as data scientists and machine learning engineers, and fostering entrepreneurship th

In [15]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

load_dotenv()  # expects OPENAI_API_KEY in .env

PDF_PATH = "Data\ShantnuKumar.pdf"  # <-- change to your PDF filename

# 1) Load PDF
loader = PyPDFLoader(PDF_PATH)
docs = loader.load()  # one Document per page

# 2) Chunk
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
splits = splitter.split_documents(docs)

# 3) Embed + index
vs = FAISS.from_documents(splits, embeddings)
retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": 1})

# 4) Prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer ONLY from the provided context. If not found, say you don't know."),
    ("human", "Question: {question}\n\nContext:\n{context}")
])

config = RunnableConfig(
    run_name="rag_example_1",
    tags=["llm_app", "rag_application"]
)
# 5) Chain
def format_docs(docs): return "\n\n".join(d.page_content for d in docs)

parallel = RunnableParallel({
    "context": retriever | RunnableLambda(format_docs),
    "question": RunnablePassthrough()
})

chain = parallel | prompt | llm_openai | StrOutputParser()

# 6) Ask questions
print("PDF RAG ready. Ask a question (or Ctrl+C to exit).")
q = input("\nQ: ")
ans = chain.invoke(q.strip() ,  config= config)
print("\nA:", ans)

PDF RAG ready. Ask a question (or Ctrl+C to exit).

A: His AWS skills include leveraging serverless AWS architectures to minimize operational costs while ensuring high scalability and security standards.


#### Imp Points
- By Default only Runnable are tracked by LangSmith, what about the other part like chunkig, loading etc

In [None]:
import os
from dotenv import load_dotenv

from langsmith import traceable  # <-- key import

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

PDF_PATH = "Data\ShantnuKumar.pdf"  # change to your file

@traceable(name="load_pdf")
def load_pdf(path: str):
    loader = PyPDFLoader(path)
    return loader.load()  # list[Document]

@traceable(name="split_documents" , metadata={"text_splitter" : "RecursiveCharacterTextSplitter"})
def split_documents(docs, chunk_size=1000, chunk_overlap=150):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    return splitter.split_documents(docs)

@traceable(name="build_vectorstore" , tags=['vectorDB'] , metadata={"vector_store" : "FAISS" })
def build_vectorstore(splits):
    # FAISS.from_documents internally calls the embedding model:
    vs = FAISS.from_documents(splits, embeddings)
    return vs

# You can also trace a “setup” umbrella span if you want:
@traceable(name="setup_pipeline")
def setup_pipeline(pdf_path: str):
    docs = load_pdf(pdf_path)
    splits = split_documents(docs)
    vs = build_vectorstore(splits)
    return vs

# ---------- pipeline ----------

prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer ONLY from the provided context. If not found, say you don't know."),
    ("human", "Question: {question}\n\nContext:\n{context}")
])

def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)

# Build the index under traced setup
vectorstore = setup_pipeline(PDF_PATH)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

parallel = RunnableParallel({
    "context": retriever | RunnableLambda(format_docs),
    "question": RunnablePassthrough(),
})

chain = parallel | prompt | llm_openai | StrOutputParser()

# ---------- run a query (also traced) ----------
print("PDF RAG ready. Ask a question (or Ctrl+C to exit).")
q = input("\nQ: ").strip()

# Give the visible run name + tags/metadata so it’s easy to find:
config = {
    "run_name": "pdf_rag_query"
}

ans = chain.invoke(q, config=config)
print("\nA:", ans)

PDF RAG ready. Ask a question (or Ctrl+C to exit).

A: He managed the full lifecycle for the SpiceReclaim booking forecast model, which included initial development, optimization, and cloud deployment. He improved the accuracy of Payload Prediction models and transitioned them to production environments, ensuring reliable performance using AWS services such as EC2, Lambda, S3, and EventBridge. He also investigated and resolved critical bugs in the Booking Forecast model under pressure, just days before the SpiceReclaim launch.
