In [1]:
from pydantic import BaseModel,Field

class State(BaseModel):
    question : str = Field(description= "Question given be the user")
    answer : str = Field(description="Answer given by the Application")
    chat_summary : str = Field(description="Chat history maintained by the applciation")
    
    

In [2]:
import filetype
from langchain.document_loaders import PyPDFLoader

def file_router(file):
    kind = filetype.guess(file)
    if kind is None:
        return "Unknown"
    file_type =  kind.mime
    if file_type.startswith("image/"):
        return 'imagesingle'

    # or else this is pdf and if there is images kind of pdf then return 'image' or text based then return pdf

    loader = PyPDFLoader(file)

    docs = loader.load()

    if not len(docs[0].page_content):
        return 'imagepdf'
    
    else :
        return 'pdf'
    





    

In [3]:
from langchain_chroma import Chroma

from langchain_google_genai import ChatGoogleGenerativeAI
import os
import base64
from langchain_core.messages import HumanMessage
from io import BytesIO

model = llm = ChatGoogleGenerativeAI(model = 'gemini-2.0-flash')

def encode_image(image) -> str:
    """Encode a PIL image to base64 string."""
    buffer = BytesIO()
    image.save(buffer, format="PNG")
    return base64.b64encode(buffer.getvalue()).decode("utf-8")

def image_summarize(model, base64_image: str, prompt: str) -> str:
    """Make image summary"""
    msg = model.invoke(
        [
            HumanMessage(
                content=[
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{base64_image}"},
                    },
                ]
            )
        ]
    )
    return msg.content


image_prompt = """You are a highly meticulous AI assistant that extracts and summarizes every possible piece of visual information from an image without omitting any detail.  
    Your task is to generate an exhaustive, structured summary of the image that captures all the text, visual elements, layout, colors (if relevant), numbers, figures, and any context or formatting that might be useful.  
    Do not generalize or paraphrase — capture the content exactly as it appears. Use bullet points, lists, or structured sections (e.g., titles, tables, headers, footnotes) to organize your summary.  

    Be especially attentive to:
    - All visible text, including headers, footnotes, and marginal notes  
    - Tables: Capture each row and column verbatim including headers and cell values  
    - Graphs/Charts: Explain all axes, labels, legends, data points, patterns, and conclusions  
    - Visual layout and structure: Describe how content is arranged (e.g., two-column layout, centered title, left-aligned figure)  
    - Icons, logos, or images embedded within the image: Describe them accurately  
    - Fonts, colors, and emphasis (e.g., bold, italic, underlined) if they seem meaningful  
    - Dates, numbers, symbols, or special formatting exactly as shown  
    - If the image is a document or scanned page, preserve hierarchy and document structure  

    Output the result in structured markdown with clear section headers (e.g., "Header", "Table 1", "Figure Description", "Text Body", "Footnotes").  
    Your goal is to allow someone to fully understand the image without seeing it, preserving maximum detail for use in downstream AI models or search systems."""



def image_handler(image):

    base64_img = encode_image(image)
    summary = image_summarize(model, base64_img, prompt=image_prompt)
    with open('example.txt','w') as f:
        f.write(summary)
    return summary





In [4]:
def image_handler_append(image):
    base64_img = encode_image(image)
    summary = image_summarize(model, base64_img, prompt=image_prompt)
    
    # Append instead of overwrite
    with open('example.txt', 'a') as f:
        f.write(summary + '\n')  # Add newline for separation
    
    return summary


In [5]:
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

def vectorize_text(text:str):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 600,chunk_overlap = 50)

    docs = splitter.split_text(text)

    vectorstore = Chroma.from_texts(docs,embedding= HuggingFaceEmbeddings())

    return vectorstore
    


In [6]:
def vectorize_single_image(image):
    summary = image_handler(image)
    return vectorize_text(summary)


In [7]:
from pdf2image import convert_from_path

def vectorize_multiple_images(image):

    images = convert_from_path(image)
    summary = ''
    for i, image in enumerate(images):
        filename = f"page_{i + 1}.png"
        print(filename)
        image.save(filename, "PNG")
        if filename == 'page_1.png':
            summary = image_handler(image)
        else:
            summary += image_handler_append(image)
    
    return vectorize_text(summary)

    

In [8]:
from langchain.document_loaders import PyPDFLoader

def vectorize_docs(filepath):
    loader = PyPDFLoader(filepath)
    docs = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size= 600,chunk_overlap= 80)
    chunks = splitter.split_documents(docs)
    vectorstore = Chroma.from_documents(chunks,HuggingFaceEmbeddings())
    return vectorstore




In [9]:
def vectorize(filepath):
    type_of_file = file_router(filepath)
    print(type_of_file)
    if type_of_file == 'imagesingle':
        return vectorize_single_image(filepath)
    elif type_of_file == 'imagepdf':
        return vectorize_multiple_images(filepath)
    else :
        return vectorize_docs(filepath)
        

   



In [10]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from dotenv import load_dotenv
load_dotenv()

prompt = hub.pull('rlm/rag-prompt')
from tqdm.autonotebook import tqdm as notebook_tqdm



retriever = vectorize('shreyankresume.pdf').as_retriever()

def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)


rag_chain = (
    {"context":retriever |format_docs , 'question':RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question} """

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]


retrieval_chain = generate_queries | retriever.map() | get_unique_union



from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)



final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)






  from tqdm.autonotebook import tqdm as notebook_tqdm


pdf


In [None]:
#print(result)

Based on the provided documents, the candidate, Shreyank, has worked on the following projects:

*   **Harini: Intelligent AI Scheduling Agent:** A fully autonomous scheduling agent using LangChain and LangGraph, automating calendar updates, confirmations, and follow-ups.
*   **AI interviewer:** With TTS abd STT and LLM
*   **ATSResumeAnalyze:** With LLM Integration
*   **MusicGenerator:** Using LSTM networks
*   **TelegramBot:** With Mistral AI
*   **CallFraudDetector:** Using Naive Bayes network
*   **CourseRecommender:** Using Vector database and Langchain
*   **VirtualLawyer:** RAG based application with LLM
* Built 20+ AI-powered chatbots using LangChain and LangGraph, demonstrating advanced multi-agent system skills.


In [11]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap, RunnablePassthrough


fallback_prompt = """
You are a helpful and honest assistant working within a RAG (Retrieval-Augmented Generation) system. You attempted to answer a user's question based on retrieved knowledge, but the information may be incomplete, irrelevant, or not confidently grounded.

Your goal now is to:
- Review the chat summary to understand ongoing conversation context.
- Review the retrieved context.
- Evaluate the initially attempted answer.
- If the context was insufficient or the answer is vague, provide a polite, thoughtful fallback response.
- If helpful, ask the user to clarify or reformulate their question.

---

Chat Summary (Conversation So Far):
{chat_summary}

User Question:
{question}

Initial Answer Attempted:
{answer}

---

Just return 'fall_back' if the answer is NOT good enough.
Return 'continue' if the answer is relevant and sufficient.
"""


fall_back_prompt = ChatPromptTemplate.from_template(fallback_prompt)

def fallback_node(state: State) -> dict:
    chat_summary = state.chat_summary
    question = state.question
    answer = state.answer

    # Combine into a runnable chain
    fallback_chain = fall_back_prompt | llm | StrOutputParser()

    # Provide input variables to the chain
    decision = fallback_chain.invoke({
        "chat_summary": chat_summary,
        "question": question,
        "answer": answer
    })

    decision = decision.strip().lower()

    if decision == "continue":
        return {"answer": answer}
    else:
        fallback_response = (
            "Apologies! It seems I don't have enough reliable information to confidently answer your question right now. "
            "This might be due to insufficient or unclear context. "
            "Please consider rephrasing your question or using a more advanced model for better results."
        )
        return {"answer": fallback_response}


In [12]:
summarizer_prompt = """
You are an expert summarizer. Summarize the entire conversation, including the latest question and answer pair, while preserving the key points.

Previous Summary ignore if its not there:
{previous_summary}

New Question and Answer:
Question: {question}
Answer: {answer}

---

Updated Summary:
"""

import dotenv 
dotenv.load_dotenv()

from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model = 'gemini-1.5-flash')

def summarizer_node(state:State):
    
    question = state.question
    answer = state.answer
    previous_summary = state.chat_summary 
    
    
    prompt = summarizer_prompt.format(
        previous_summary=previous_summary,
        question=question,
        answer=answer
    )

    
    updated_summary = llm.invoke(prompt)

    
    return {"updated_summary": updated_summary}


In [13]:

from typing import Literal 
from langchain_core.runnables import RunnableParallel


class HallucinationGrader(BaseModel):
    "Binary score for hallucination check in llm's response"

    grade: Literal["yes", "no"] = Field(
        ..., description="'yes' if the llm's reponse is hallucinated otherwise 'no'"
    )



In [14]:
hallucination_grader_system_prompt_template = (
    '''You are a grader assessing whether a response from an LLM is grounded in the given question and the context provided during the retrieval process.
    You will be given the following inputs:
    - The question asked by the user is {question}

    - The answer provided by the RAG application is on the basis of the context is 
    {answer}

    
    Your task is to determine if the LLM's response is based on the context (implicitly retrieved).
    If the LLM's response does not align with the question or context (introduces unrelated information), 
    it is considered a hallucination. In such cases, give a score of 'yes' (hallucinated).
    If the LLM's response is grounded in the context and consistent with the question, give a score of 'no' (not hallucinated).
    
    Just provide your answer in the following JSON format:
     grade: yes  or  grade: no 
    No additional explanation is needed.'''


)

hallucination_grader_prompt = ChatPromptTemplate.from_template(hallucination_grader_system_prompt_template)



In [15]:
def hallucination_grader(state: State):
    hallucination_grader_chain = (
        hallucination_grader_prompt
        | llm.with_structured_output(HallucinationGrader, method="json_mode")
    )

    
    graded_response = hallucination_grader_chain.invoke({
        'question': state.question,
        'answer': state.answer
    })
    
    return graded_response


In [16]:


class AnswerGrader(BaseModel):
    '''Binary score for an answer check based on a query.'''

    grade: Literal["yes", "no"] = Field(
        description="'yes' if the provided answer is an actual answer to the query otherwise 'no',"
    )


answer_grader_system_prompt_template = (
    '''
    You are a grader assessing whether the provided answer is a valid and relevant response to the given query.
    If the provided answer addresses the query correctly, give a score of 'yes'.
    If the provided answer does not answer the query or is irrelevant, give a score of 'no'.
    Just provide your answer in the following JSON format:
    grade: yes or grade: no
    No additional explanation is needed.'''
)


In [17]:

answer_grader_prompt = ChatPromptTemplate.from_template(answer_grader_system_prompt_template)
answer_grader_chain = answer_grader_prompt | llm.with_structured_output(
        AnswerGrader, method="json_mode"
    )


def answer_grader(state:State):
    answer = state.answer
    question = state.question
    grade = answer_grader_chain.invoke({'question':question,'answer':answer})
    return grade
    


In [18]:

def hallucination_and_answer_relevance_check(state:State):
    

    hallucination_grade = hallucination_grader(state)
    print(f"hallucinatio grade is {hallucination_grade}")
    if hallucination_grade.grade == "no":
        print("---Hallucination check passed---")
        answer_relevance_grade = answer_grader(state)
        print(answer_relevance_grade)
        if answer_relevance_grade.grade == "yes":
            print("---Answer is relevant to question---\n")
            return "useful"
        else:
            print("---Answer is not relevant to question---")
            return "not_useful"
    print("---Hallucination check failed---")
    return "generate"

In [19]:
def rag_answer(state:State):
    query = state.question
    answer = final_rag_chain.invoke({'question':query})
    return {'answer':answer}

In [44]:
from langgraph.graph import StateGraph, START, END

builder = StateGraph(State)

# Add your nodes
builder.add_node("rag_answer", rag_answer)
builder.add_node("summarizer_node", summarizer_node)
builder.add_node("fallback_node", fallback_node)
builder.add_node(hallucination_grader)
builder.add_node(answer_grader)


# Define your edges
builder.add_edge(START, "rag_answer")
builder.add_edge("rag_answer", "summarizer_node")
builder.add_edge('summarizer_node','hallucination_grader')
builder.add_edge('hallucination_grader','answer_grader')

# Add conditional edge using the function as a router
builder.add_conditional_edges(
    "answer_grader",
    hallucination_and_answer_relevance_check,  
   {
        "useful": END,  
        "generate": "rag_answer", 
        "not_useful": "fallback_node"
    },
)
builder.add_edge("fallback_node", END)

graph = builder.compile()

In [21]:
state = State(question = "What job role is perfect for the candidiate",answer = '',chat_summary = '')

In [46]:
graph.invoke(state)

hallucinatio grade is grade='no'
---Hallucination check passed---
grade='yes'
---Answer is relevant to question---



{'question': 'What job role is perfect for the candidiate',
 'answer': 'Based on the provided documents, Shreyank would be a good fit for roles such as:\n\n*   **AI Engineer/Developer:** His proficiency in Deep Learning frameworks (PyTorch, TensorFlow), Generative & Agentic AI tools (LangChain, LangGraph, OpenAI, Groq, Anthropic), and experience building AI-powered chatbots and applications strongly suggest suitability for this role.\n*   **Backend Developer (AI Focus):** His experience with Java, Spring Boot, Spring MVC, Spring REST, FastAPI, and Docker, coupled with his AI skills, makes him a strong candidate for backend development roles that involve integrating AI functionalities.\n*   **Machine Learning Engineer:** His achievements in AI hackathons, experience with natural language processing, computer vision, and building AI projects like the "AI interviewer" and "CallFraudDetector" highlight his capabilities in machine learning.\n*   **Full Stack Developer (AI Specialization):**

In [45]:
print(graph.get_graph().draw_ascii())

                     +-----------+           
                     | __start__ |           
                     +-----------+           
                            *                
                            *                
                            *                
                     +------------+          
                     | rag_answer |          
                     +------------+          
                   ***             ...       
                 **                   ..     
               **                       ...  
  +-----------------+                      ..
  | summarizer_node |                       .
  +-----------------+                       .
            *                               .
            *                               .
            *                               .
+----------------------+                   ..
| hallucination_grader |                ...  
+----------------------+              ..     
                   ***            

In [48]:
state

State(question='What job role is perfect for the candidiate', answer='', chat_summary='')