In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
import json
import re
import fitz  # PyMuPDF
import time
import logging
import sys
import textwrap
from typing import TypedDict, List, Dict, Optional, Any, Type

from pydantic import BaseModel, Field, ValidationError
from typing_extensions import TypedDict

from langgraph.graph import StateGraph, END
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [2]:
def setup_logger(name: str = "interview_bot") -> logging.Logger:
    logger = logging.getLogger(name)
    if not logger.handlers:
        logger.setLevel(logging.INFO)
        handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            "[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S",
        )
        handler.setFormatter(formatter)
        logger.addHandler(handler)
    return logger

logger = setup_logger()


In [3]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")


E0000 00:00:1760957632.479498    7946 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1760957632.483502    7946 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [4]:

def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text("text") + "\n"
    return text

def chunk_cv_text(cv_text: str, user_id: str = "default_user") -> list:
    """Splits CV text into chunks for embedding and retrieval."""
    chunk_size = 800
    chunk_overlap = 200
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

    cv_text = cv_text.strip().replace("\n", " ")
    documents = []
    
    for i, chunk in enumerate(splitter.split_text(cv_text)):
        chunk_text = chunk.strip()
        if len(chunk_text) < 20:
            continue
        documents.append(Document(
            page_content=chunk_text,
            metadata={"user_id": user_id, "chunk_index": i}
        ))
    return documents

In [5]:

def create_vectorstore(documents: List[Document], user_id: str = "default_user"):
    """Create FAISS vectorstore from documents."""
    index_dir = os.path.join(os.getcwd(), f"faiss_index_{user_id}")
    vectorstore = FAISS.from_documents(documents, embeddings)
    vectorstore.save_local(index_dir)
    return vectorstore

In [6]:

# ===== RETRIEVAL DECISION =====

def decide_retrieval(question: str, user_id: str = "default_user"):
    """Decides whether to retrieve context based on the question."""
    try:
        index_dir = os.path.join(os.getcwd(), f"faiss_index_{user_id}")
        if not os.path.exists(index_dir):
            logger.warning("No FAISS index found, skipping retrieval.")
            return False, 1.0
        
        vectorstore = FAISS.load_local(
            index_dir,
            embeddings,
            allow_dangerous_deserialization=True
        )
        
        top_chunks = vectorstore.similarity_search_with_score(question, k=3)
        if not top_chunks:
            return False, 1.0

        min_distance = min(score for _, score in top_chunks)
        # Larger distance => less similar => need retrieval
        needs_retrieval = min_distance > 0.55
        
        return needs_retrieval, min_distance
        
    except Exception as e:
        logger.error(f"Retrieval decision error: {e}")
        return False, 1.0

# ===== STATE =====

In [7]:

class InterviewState(TypedDict):
    topic: str
    content: List[str]
    cv_content: str
    questions: List[str]
    answers: List[str]
    feedback: List[Dict]
    current_question: Optional[str]
    current_answer: Optional[str]
    step: int
    max_questions: int
    final_evaluation: Optional[Dict]
    messages: List[Dict]
    question_type: str
    needs_retrieval: bool
    retrieved_context: Optional[str]
    similarity_score: Optional[float]
    user_id: str


In [8]:
# ===== GEMINI CLIENT =====

class QuestionFeedback(BaseModel):
    rating: int = Field(0, ge=0, le=10)
    feedback: str = "No feedback"

class AnswerFeedback(BaseModel):
    rating: int = Field(0, ge=0, le=10)
    feedback: str = "No feedback"

class GeminiClient:
    """Wrapper around Gemini LLM API."""
    def __init__(self):
        self.model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

    def generate_content(self, prompt: str) -> str:
        try:
            response = self.model.invoke(prompt)
            if hasattr(response, "content"):
                return response.content.strip()
            elif hasattr(response, "text"):
                return response.text.strip()
            else:
                return str(response).strip()
        except Exception as e:
            logger.error(f"Gemini generation failed: {e}")
            return "Tell me about your experience with this technology."
    def safe_parse_json(
        self, response_text: str, model: Type[BaseModel] = QuestionFeedback
    ) -> dict:
        """
        Safely parses a JSON string response from Gemini LLM and validates it using a Pydantic model.
        Returns a default model dictionary if parsing or validation fails.

        Args:
            response_text (str): The raw JSON string returned by Gemini LLM.
            model (Type[BaseModel], optional): Pydantic model class to validate JSON. Default is QuestionFeedback.

        Returns:
            dict: Validated dictionary according to the Pydantic model.
        """
        if not response_text or not response_text.strip():
            logger.warning("Empty response received; returning default model")
            return model().dict()

        # Extract JSON substring from the response
        match = re.search(r"\{.*\}", response_text, re.DOTALL)
        if match:
            try:
                data = json.loads(match.group(0))
                validated = model(**data)
                return validated.dict()
            except (json.JSONDecodeError, ValidationError) as e:
                logger.error(f"Failed to parse/validate JSON: {e}")
                return model().dict()
        logger.warning("No JSON found in response; returning default model")
        return model().dict()


gemini_client = GeminiClient()


E0000 00:00:1760957632.554445    7946 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [9]:

# def get_setup_prompt(topic: str, question_type: str) -> str:
#     return f"""
#     You are conducting a technical interview for a {topic} position.
#     Generate an initial interview question that assesses basic knowledge and experience.
#     """

# def get_rag_setup_prompt(topic: str, question_type: str, context: str) -> str:
#     return f"""
#     You are conducting a technical interview for a {topic} position.

#     Candidate Background:
#     {context}

#     Generate the first interview question that considers the candidate's experience.
#     """

# def get_question_generation_prompt(content: str, topic: str, step: int) -> str:
#     return f"""
#     Generate the next interview question for a {topic} position.

#     Conversation so far:
#     {content}

#     Current step: {step + 1}

#     Next question:
#     """

# def get_rag_question_generation_prompt(content: str, topic: str, step: int, context: str) -> str:
#     return f"""
#     Generate the next interview question for a {topic} position.

#     Candidate Background:
#     {context}

#     Conversation so far:
#     {content}

#     Current step: {step + 1}

#     Next question:
#     """

# # ===== UTIL =====

# def safe_prompt(fstring: str) -> str:
#     return textwrap.dedent(fstring).strip()

# def _safe_generate(prompt: str, fallback: str) -> str:
#     try:
#         return gemini_client.generate_content(prompt) or fallback
#     except Exception as e:
#         logger.error("Generation failed: %s", e)
#         return fallback

"""
Interview System Prompts
Centralized prompt templates for the AI Interviewer system
"""

import textwrap
from typing import Literal

# -------------------------------
# Utility functions
# -------------------------------

def safe_text(text: str, max_len: int = 2000) -> str:
    """
    Sanitize and truncate user-provided or large text to avoid context overflow
    and unwanted characters.
    """
    if not text:
        return ""
    sanitized = str(text).replace("\r", "").replace("\t", "    ")
    return sanitized[:max_len]

def build_prompt(role_desc: str, content: str, body: str) -> str:
    """
    Standard prompt builder to avoid duplication.
    Applies safe_text to content and strips extra whitespace.
    """
    return textwrap.dedent(f"""
        You are {role_desc}.
        Using the following reference content:
        {safe_text(content)}

        {body}
    """).strip()


# -------------------------------
# Setup and Initial Question Prompts
# -------------------------------

def get_setup_prompt(content_text: str, topic: str, question_type: str) -> str:
    """Prompt for generating the first question"""
    question_style = "Ask a broad, general question." if question_type.startswith('broad') else "Ask a specific, detailed question."
    body = f"Generate question #1 for the topic: {topic}.\n{question_style}\nReturn ONLY the question text."
    return build_prompt("an expert interviewer", content_text, body)


# -------------------------------
# Question Generation Prompts
# -------------------------------

def get_question_generation_prompt(content_text: str, prompt_instruction: str, topic: str, step: int) -> str:
    """Prompt for generating follow-up questions"""
    body = f"{prompt_instruction}\nTopic: {topic}\nQuestion number: {step + 1}\nReturn ONLY the question text."
    return build_prompt("an expert interviewer", content_text, body)

def get_question_instruction(is_followup: bool, is_broad: bool, previous_answer: str = "") -> str:
    """
    Generate the instruction part for question generation.
    Avoids awkward references when previous_answer is empty.
    """
    style = "broad, general" if is_broad else "specific, detailed"
    scope = "follow-up" if is_followup else "new aspect"

    if is_followup and previous_answer.strip():
        return f"Generate a {style} {scope} question that directly probes details from the previous answer: {previous_answer}"
    elif is_followup:
        return f"Generate a {style} {scope} question that builds on the previous discussion."
    else:
        return f"Generate a {style} {scope} question that explores a new aspect of the topic, independent of the previous answer."


# -------------------------------
# Generic Evaluation Prompt
# -------------------------------

def get_evaluation_prompt(
    kind: Literal["question", "answer"],
    full_messages: str,
    full_content: str,
    transcript: str,
    last_question: str = "",
    last_answer: str = ""
) -> str:
    """
    Generic evaluation prompt for either 'question' or 'answer'.
    Sanitizes all inputs and prevents exceeding context.
    """
    kind_desc = "question" if kind == "question" else "candidate answer"

    body = textwrap.dedent(f"""
        Evaluate the following {kind_desc} for its clarity, relevance, depth, and alignment with the topic,
        considering the ENTIRE interview history, accumulated context, all previous messages, questions, answers, and feedback.

        Full Interview History (Messages): {safe_text(full_messages)}
        Accumulated Context (Search Snippets): {safe_text(full_content)}
        Previous Q&A Transcript: {safe_text(transcript)}
        Current Question: {safe_text(last_question)}
        Current Candidate Answer: {safe_text(last_answer)}

        Provide a rating (1-10) for {kind_desc} quality and detailed feedback. Return JSON only, no extra text
    """)

    if kind == "answer":
        body += textwrap.dedent("""
            Return in JSON format:
            {
                "rating": 0,
                "feedback": "..."
            }
        """)

    return build_prompt("an expert interviewer", "", body)


# -------------------------------
# Final Evaluation Prompt
# -------------------------------

def get_final_evaluation_prompt(transcript: str) -> str:
    """Prompt for final evaluation of all questions"""
    body = textwrap.dedent(f"""
        Based on this transcript, produce a JSON summary evaluation of the questions:
        {safe_text(transcript)}
        return only JSON, no extra text
        JSON format ONLY, with explicit types:
        {{
            "overall_quality": 0,             # integer 1-10
            "strengths": ["..."],             # list of strings
            "areas_for_improvement": ["..."], # list of strings
            "recommendation": "...",          # string: keep/revise/remove
            "final_feedback": "..."           # string
        }}
    """).strip()
    return build_prompt("an expert interviewer", "", body)


In [10]:
"""
Agentic RAG interview nodes (clean, structured, and printable report)
Drop this into your nodes module (replace the corresponding functions).
Assumes external symbols exist:
 - InterviewState (mapping-like), gemini_client, FAISS, embeddings
 - decide_retrieval, _safe_generate, safe_prompt
 - get_setup_prompt, get_rag_setup_prompt, get_question_generation_prompt,
   get_rag_question_generation_prompt, get_question_instruction,
   get_evaluation_prompt, get_final_evaluation_prompt
 - setup_logger
If any of those are missing in your environment, import or adapt accordingly.
"""

import os
import json
import time
import textwrap
from dataclasses import dataclass
from typing import Any, Dict, List, Mapping, Optional

# -------------------------
# Logger (reuse your setup)
# -------------------------
logger = setup_logger(__name__)

# -------------------------
# Helper utilities
# -------------------------
def safe_parse_json(response: Any) -> Dict[str, Any]:
    """
    Robustly parse possible model responses into a Python dict.
    Handles:
      - dict-like responses
      - objects with `.text` attribute
      - strings that contain JSON (extract first {...} substring)
    Falls back to a minimal safe structure if parsing fails.
    """
    fallback = {"rating": 6, "feedback": "Good effort. Could elaborate more."}

    if not response:
        return fallback

    # If it's already a dict-like object, return as-is
    if isinstance(response, dict):
        return response

    # If it's an object with .text or .content, try to pull its string
    text = None
    if hasattr(response, "text"):
        try:
            text = response.text
        except Exception:
            text = None
    if text is None and hasattr(response, "content"):
        try:
            text = response.content
        except Exception:
            text = None
    if text is None and isinstance(response, str):
        text = response

    if not text:
        return fallback

    # Try full JSON parse
    try:
        return json.loads(text)
    except Exception:
        pass

    # Try to extract substring that looks like JSON object
    try:
        start = text.find("{")
        end = text.rfind("}") + 1
        if start != -1 and end != -1 and end > start:
            json_str = text[start:end]
            return json.loads(json_str)
    except Exception:
        pass

    # As a last attempt, return fallback but include the raw text for debugging
    return {"rating": 6, "feedback": "Good effort. Could elaborate more.", "raw_text": text[:1000]}


@dataclass
class FinalEvaluation:
    """
    Structured final evaluation object used to store overall summary.
    """
    overall_quality: int
    strengths: List[str]
    areas_for_improvement: List[str]
    recommendation: str
    final_feedback: str

    def model_dump(self) -> Dict[str, Any]:
        return {
            "overall_quality": int(self.overall_quality),
            "strengths": list(self.strengths or []),
            "areas_for_improvement": list(self.areas_for_improvement or []),
            "recommendation": str(self.recommendation),
            "final_feedback": str(self.final_feedback),
        }


# -------------------------
# Nodes
# -------------------------

# ---------- SETUP NODE ----------
def setup_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Initialize interview state, optionally run RAG retrieval for topic.
    """
    topic = state.get("topic", "").strip()
    question_type = state.get("question_type", "broad_followup").strip()
    user_id = state.get("user_id", "default_user")

    needs_retrieval = True
    retrieved_context = ""
    similarity_score = 0.0

    if needs_retrieval:
        try:
            index_dir = os.path.join(os.getcwd(), f"faiss_index_{user_id}")
            vectorstore = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
            docs = vectorstore.similarity_search(topic, k=3)
            retrieved_context = "\n\n".join([doc.page_content for doc in docs])
            logger.info("Retrieved context for setup based on topic: %s", topic)
        except Exception as e:
            logger.error("Setup retrieval failed: %s", e)

    # prompt builder expects (content_text, topic, question_type)
    if retrieved_context:
        prompt = safe_prompt(get_setup_prompt(retrieved_context, topic, question_type))
    else:
        prompt = safe_prompt(get_setup_prompt("", topic, question_type))

    first_question = _safe_generate(prompt, "Tell me about your experience with this technology.")

    new_state = {
        **dict(state),
        "topic": topic,
        "question_type": question_type,
        "content": [retrieved_context or "No content"],
        "messages": [{"role": "user", "content": f"Interview topic: {topic}"}],
        "step": 0,
        "questions": [],
        "answers": [],
        "feedback": [],
        "current_question": first_question,
        "max_questions": state.get("max_questions", 3),
        "needs_retrieval": needs_retrieval,
        "retrieved_context": retrieved_context,
        "similarity_score": similarity_score,
    }
    return new_state


# ---------- GET ANSWER NODE ----------
def get_answer_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Collect user's answer (CLI) and update messages/transcript.
    Replace blocking input() if using a web frontend.
    """
    current_q = state.get("current_question")
    if not current_q:
        raise ValueError("No current_question found in state.")

    answer = input(f"\n❓ Question {state.get('step', 0) + 1}: {current_q}\n💭 Your answer: ").strip()

    new_messages = list(state.get("messages", [])) + [
        {"role": "interviewer", "content": current_q},
        {"role": "candidate", "content": answer},
    ]

    content_list = list(state.get("content", []))
    content_list.append(f"Q: {current_q}\nA: {answer}")

    return {
        **dict(state),
        "current_answer": answer,
        "messages": new_messages,
        "questions": list(state.get("questions", [])) + [current_q],
        "answers": list(state.get("answers", [])) + [answer],
        "content": content_list,
    }


# ---------- RETRIEVAL DECISION NODE ----------
def retrieval_decision_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Decide whether to retrieve additional context based on the current answer.
    Uses `decide_retrieval(answer, user_id)` - implement that heuristic as needed.
    """
    current_answer = state.get("current_answer", "")
    user_id = state.get("user_id", "default_user")

    if not current_answer:
        return {**dict(state), "needs_retrieval": False, "retrieved_context": None, "similarity_score": 0}

    needs_retrieval, similarity_score = decide_retrieval(current_answer, user_id)
    retrieved_context = None

    if needs_retrieval:
        try:
            index_dir = os.path.join(os.getcwd(), f"faiss_index_{user_id}")
            vectorstore = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
            docs = vectorstore.similarity_search(current_answer, k=3)
            retrieved_context = "\n\n".join([doc.page_content for doc in docs])
            logger.info("Retrieved context based on answer similarity: %.2f", similarity_score)
        except Exception as e:
            logger.error("Answer retrieval failed: %s", e)

    return {
        **dict(state),
        "needs_retrieval": needs_retrieval,
        "retrieved_context": retrieved_context,
        "similarity_score": similarity_score,
    }


# ---------- EVALUATE QUESTION NODE ----------
def evaluate_question_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Use the LLM to evaluate both the question quality and the candidate's answer.
    Stores structured feedback in state['feedback'].
    """
    questions = list(state.get("questions", []))
    answers = list(state.get("answers", []))
    if not questions or not answers:
        logger.warning("No questions/answers to evaluate.")
        return dict(state)

    current_q = questions[-1]
    current_a = answers[-1]
    full_content = "\n".join(state.get("content", []))
    transcript = "\n".join([f"Q: {q}\nA: {a}" for q, a in zip(questions, answers)])
    messages_text = "\n".join([m.get("content", "") for m in state.get("messages", [])])

    try:
        # Ask for a question-quality evaluation
        q_prompt = get_evaluation_prompt(
            kind="question",
            full_messages=messages_text,
            full_content=full_content,
            transcript=transcript,
            last_question=current_q,
            last_answer=current_a,
        )
        q_raw = gemini_client.generate_content(q_prompt)
        q_parsed = safe_parse_json(q_raw)

        # Ask for an answer evaluation
        a_prompt = get_evaluation_prompt(
            kind="answer",
            full_messages=messages_text,
            full_content=full_content,
            transcript=transcript,
            last_question=current_q,
            last_answer=current_a,
        )
        a_raw = gemini_client.generate_content(a_prompt)
        a_parsed = safe_parse_json(a_raw)

    except Exception as e:
        logger.warning("Gemini feedback parsing failed: %s", e)
        q_parsed = {"rating": 6, "feedback": "Good effort. Could elaborate more."}
        a_parsed = {"rating": 6, "feedback": "Good effort. Could elaborate more."}

    # Normalise field names to your earlier schema if necessary
    # e.g., ensure 'rating' or 'score' consistently present
    def _norm(parsed: Dict[str, Any]) -> Dict[str, Any]:
        out = dict(parsed)
        if "score" in parsed and "rating" not in parsed:
            out["rating"] = parsed["score"]
        if "rating" in parsed and isinstance(parsed["rating"], (float, str)):
            try:
                out["rating"] = int(parsed["rating"])
            except Exception:
                pass
        return out

    q_final = _norm(q_parsed)
    a_final = _norm(a_parsed)

    # Append to feedback list
    feedback_list = list(state.get("feedback", []))
    feedback_list.append({"question_feedback": q_final, "answer_feedback": a_final})

    logger.info("Question %d evaluated: %s", state.get("step", 0) + 1, a_final.get("feedback", ""))

    return {**dict(state), "feedback": feedback_list, "step": state.get("step", 0) + 1}


# ---------- GENERATE QUESTION NODE ----------
def generate_question_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Generate the next question. Uses RAG prompt if retrieved_context present.
    Adapts behavior according to `state['question_type']`.
    """
    step = state.get("step", 0)
    max_questions = state.get("max_questions", 3)
    if step >= max_questions:
        logger.warning("Max questions reached, skipping question generation.")
        return dict(state)

    topic = state.get("topic", "")
    content_list = list(state.get("content", ["No content"]))
    prompt_instruction = get_question_instruction(
        is_followup=True if "followup" in state.get("question_type", "") else False,
        is_broad=True if "broad" in state.get("question_type", "") else False,
        previous_answer=state.get("current_answer", "")
    )

    retrieved_context = state.get("retrieved_context")
    if retrieved_context and state.get("needs_retrieval", False):
        prompt = safe_prompt(get_rag_question_generation_prompt("\n".join(content_list), topic, step, retrieved_context))
        logger.info("Using RAG context for question generation")
    else:
        prompt = safe_prompt(get_question_generation_prompt("\n".join(content_list), prompt_instruction, topic, step))
        logger.info("Using standard question generation")

    question = _safe_generate(prompt, f"Tell me more about your experience with {topic}.")
    return {**dict(state), "current_question": question}


# ---------- FINAL EVALUATION NODE ----------
def final_evaluation_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Summarize all feedback and produce a final structured evaluation.
    """
    questions = list(state.get("questions", []))
    answers = list(state.get("answers", []))
    feedback = list(state.get("feedback", []))

    if not questions or not answers:
        logger.warning("No data available for final evaluation.")
        return dict(state)

    # Compose transcript for final prompt
    transcript = ""
    for i in range(len(questions)):
        q = questions[i]
        a = answers[i] if i < len(answers) else ""
        fb = feedback[i] if i < len(feedback) else {}
        transcript += f"Q{i+1}: {q}\nA{i+1}: {a}\nFeedback: {fb.get('answer_feedback', {}).get('feedback', '')}\n\n"

    final_prompt = get_final_evaluation_prompt(transcript)
    try:
        raw_final = gemini_client.generate_content(final_prompt)
        parsed_final = safe_parse_json(raw_final)
    except Exception as e:
        logger.error("Final evaluation parsing failed: %s", e)
        parsed_final = {}

    final_eval = FinalEvaluation(
        overall_quality=int(parsed_final.get("overall_quality", 7)),
        strengths=parsed_final.get("strengths", ["Good technical depth"]),
        areas_for_improvement=parsed_final.get("areas_for_improvement", ["Could elaborate on examples"]),
        recommendation=parsed_final.get("recommendation", "Recommended with reservations."),
        final_feedback=parsed_final.get("final_feedback", "Solid overall performance with scope for improvement."),
    )

    logger.info("Final evaluation completed successfully.")
    return {**dict(state), "final_evaluation": final_eval.model_dump()}


# ---------- DISPLAY RESULTS NODE ----------
def display_results_node(state: Mapping[str, Any]) -> Dict[str, Any]:
    """
    Pretty-print full interview report to console and save structured JSON to disk.
    """
    topic = state.get("topic", "N/A")
    user_id = state.get("user_id", "user")
    questions = list(state.get("questions", []))
    answers = list(state.get("answers", []))
    feedback = list(state.get("feedback", []))
    final_eval = state.get("final_evaluation", {})

    # Pretty terminal output
    sep = "=" * 70
    print("\n" + sep)
    print(f"INTERVIEW REPORT — Topic: {topic}")
    print(sep + "\n")

    for i, q in enumerate(questions, start=1):
        a = answers[i - 1] if i - 1 < len(answers) else ""
        fb = feedback[i - 1].get("answer_feedback", {}) if i - 1 < len(feedback) else {}
        q_fb = feedback[i - 1].get("question_feedback", {}) if i - 1 < len(feedback) else {}
        print(f"Q{i}: {q}")
        print(f"A{i}: {a}\n")
        print("Question Feedback:")
        print(f"  {q_fb.get('feedback', q_fb.get('comment', 'No feedback'))}")
        print(f"  (Rating: {q_fb.get('rating', q_fb.get('score', '-'))})\n")
        print("Answer Feedback:")
        print(f"  {fb.get('feedback', 'No feedback')}")
        # if suggestions exist, show them
        suggestions = fb.get("suggestions") or fb.get("recommendations") or []
        if suggestions:
            print("  Suggestions:")
            for s in suggestions:
                print(f"   - {s}")
        print(f"  (Rating: {fb.get('rating', fb.get('score', '-'))})")
        print("-" * 70)

    # Final evaluation summary
    print("\nFINAL EVALUATION")
    print("-" * 70)
    print(f"Overall Quality: {final_eval.get('overall_quality', 'N/A')}/10")
    print("Strengths:")
    for s in final_eval.get("strengths", []):
        print(f" - {s}")
    print("Areas for improvement:")
    for a in final_eval.get("areas_for_improvement", []):
        print(f" - {a}")
    print(f"Recommendation: {final_eval.get('recommendation', 'N/A')}")
    print(f"\nFinal Feedback: {final_eval.get('final_feedback', '')}")
    print(sep + "\n")

    # Save JSON result
    try:
        os.makedirs("results", exist_ok=True)
        timestamp = time.strftime("%Y%m%d_%H%M%S")
        filename = f"results/{user_id}_{timestamp}.json"
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(dict(state), f, indent=2, ensure_ascii=False)
        logger.info("Results saved to %s", filename)
        print(f"Results saved to {filename}")
    except Exception as e:
        logger.error("Failed to save results: %s", e)
        print("Failed to save results:", e)

    return dict(state)


# -------------------------
# End of nodes module
# -------------------------


In [11]:

# ===== GRAPH CONSTRUCTION =====

SETUP_NODE = "setup"
GET_ANSWER_NODE = "get_answer"
RETRIEVAL_DECISION_NODE = "retrieval_decision"
EVALUATE_QUESTION_NODE = "evaluate_question"
GENERATE_QUESTION_NODE = "generate_question"
FINAL_EVALUATION_NODE = "final_evaluation"
DISPLAY_RESULTS_NODE = "display_results"

def should_continue(state: InterviewState) -> str:
    step = state.get("step", 0)
    max_questions = state.get("max_questions", 3)
    return GENERATE_QUESTION_NODE if step < max_questions else FINAL_EVALUATION_NODE

def create_interview_graph() -> StateGraph:
    logger.info("Initializing interview graph with RAG...")

    builder = StateGraph(InterviewState)
    builder.add_node(SETUP_NODE, setup_node)
    builder.add_node(GET_ANSWER_NODE, get_answer_node)
    builder.add_node(RETRIEVAL_DECISION_NODE, retrieval_decision_node)
    builder.add_node(EVALUATE_QUESTION_NODE, evaluate_question_node)
    builder.add_node(GENERATE_QUESTION_NODE, generate_question_node)
    builder.add_node(FINAL_EVALUATION_NODE, final_evaluation_node)
    builder.add_node(DISPLAY_RESULTS_NODE, display_results_node)

    builder.set_entry_point(SETUP_NODE)
    builder.add_edge(SETUP_NODE, GET_ANSWER_NODE)
    builder.add_edge(GET_ANSWER_NODE, RETRIEVAL_DECISION_NODE)
    builder.add_edge(RETRIEVAL_DECISION_NODE, EVALUATE_QUESTION_NODE)
    builder.add_conditional_edges(
        EVALUATE_QUESTION_NODE,
        should_continue,
        {
            GENERATE_QUESTION_NODE: GENERATE_QUESTION_NODE,
            FINAL_EVALUATION_NODE: FINAL_EVALUATION_NODE,
        },
    )
    builder.add_edge(GENERATE_QUESTION_NODE, GET_ANSWER_NODE)
    builder.add_edge(FINAL_EVALUATION_NODE, DISPLAY_RESULTS_NODE)
    builder.add_edge(DISPLAY_RESULTS_NODE, END)

    logger.info("Interview graph with RAG successfully constructed.")
    return builder.compile()


In [12]:

# ===== MAIN EXECUTION =====

if __name__ == "__main__":
    cv_path = input("Enter CV path: ").strip()
    cv_text = extract_text_from_pdf(cv_path)
    documents = chunk_cv_text(cv_text, user_id="user123")
    vectorstore = create_vectorstore(documents, user_id="user123")
    print(f"✅ CV processed: {len(documents)} chunks created and indexed.")

    topic = input("Enter interview topic/job title: ").strip()
    print("\nChoose question style:")
    print("1. Broad, follow-up questions (general, builds on previous answers)")
    print(
        "2. Narrow, follow-up questions (specific, probes details from previous answers)"
    )
    print("3. Broad, non-follow-up questions (general, new topic aspects)")
    print("4. Narrow, non-follow-up questions (specific, new topic aspects)")

    question_type_map = {
        "1": "broad_followup",
        "2": "narrow_followup",
        "3": "broad_nonfollowup",
        "4": "narrow_nonfollowup",
    }

    while True:
        choice = input("Enter choice (1-4): ").strip()
        if choice in question_type_map:
            question_type = question_type_map[choice]
            break
        print("⚠️ Invalid choice! Please enter 1, 2, 3, or 4.")

    interview_graph = create_interview_graph()

    initial_state = {
        "topic": topic,
        "content": [],
        "cv_content": cv_text[:1000],
        "questions": [],
        "answers": [],
        "feedback": [],
        "current_question": None,
        "current_answer": None,
        "step": 0,
        "max_questions": 3,
        "final_evaluation": None,
        "messages": [],
        "question_type": question_type,
        "needs_retrieval": False,
        "retrieved_context": None,
        "similarity_score": None,
        "user_id": "user123"
    }

    print("\n🚀 Starting interview...\n")
    final_state = interview_graph.invoke(initial_state)
    print("\n✅ Interview completed! Results saved in interview_results.json")


Enter CV path:  cv3.pdf


✅ CV processed: 7 chunks created and indexed.


Enter interview topic/job title:  python senior dev



Choose question style:
1. Broad, follow-up questions (general, builds on previous answers)
2. Narrow, follow-up questions (specific, probes details from previous answers)
3. Broad, non-follow-up questions (general, new topic aspects)
4. Narrow, non-follow-up questions (specific, new topic aspects)


Enter choice (1-4):  2


[2025-10-20 13:54:10] [INFO] [__main__] Initializing interview graph with RAG...
[2025-10-20 13:54:10] [INFO] [__main__] Interview graph with RAG successfully constructed.

🚀 Starting interview...

[2025-10-20 13:54:10] [INFO] [__main__] Retrieved context for setup based on topic: python senior dev


NameError: name 'safe_prompt' is not defined