# AI-Powered Interview Question Generator & Evaluator (LangChain + Azure OpenAI)

This notebook demonstrates a **multi-agent** interview system built with **LangChain** and **Azure OpenAI**. It:
1) analyzes a job description,
2) generates role-specific interview questions,
3) evaluates candidate answers, and
4) produces a structured hiring recommendation using an orchestrated agent workflow with session memory.


## 0. Install dependencies


In [None]:
%pip -q install -U langchain-openai langchain-core langgraph python-dotenv pydantic azure-storage-blob


## 1. Configure Azure OpenAI

Set these environment variables (recommended: `.env`).

- `AZURE_OPENAI_ENDPOINT` e.g. `https://<resource>.openai.azure.com/`
- `AZURE_OPENAI_API_KEY`
- `OPENAI_API_VERSION`
- `AZURE_OPENAI_CHAT_DEPLOYMENT`


In [None]:
import os
from dotenv import load_dotenv
load_dotenv(override=True)

required = ["AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_API_KEY", "OPENAI_API_VERSION", "AZURE_OPENAI_CHAT_DEPLOYMENT"]
missing = [k for k in required if not os.getenv(k)]
print("Missing env vars:", missing)


## 2. Define schemas (structured outputs)

In [None]:
from pydantic import BaseModel, Field
from typing import List, Literal, Optional

class JobAnalysis(BaseModel):
    role_title: str = Field(..., description="Role title inferred from the JD")
    seniority: str = Field(..., description="Seniority level (e.g., Junior/Mid/Senior/Lead)")
    must_have_skills: List[str]
    nice_to_have_skills: List[str]
    responsibilities: List[str]
    interview_focus_areas: List[str] = Field(..., description="Competency areas to cover")
    evaluation_rubric: List[str] = Field(..., description="Scoring guidelines")

class InterviewQuestion(BaseModel):
    id: str
    competency: str
    difficulty: Literal["easy", "medium", "hard"]
    question: str
    expected_signals: List[str]
    red_flags: List[str]

class QuestionSet(BaseModel):
    questions: List[InterviewQuestion]

class AnswerEvaluation(BaseModel):
    question_id: str
    score_1_to_5: int = Field(..., ge=1, le=5)
    rationale: str
    signal_tags: List[str] = Field(default_factory=list)
    followup_question: Optional[str] = None

class HiringRecommendation(BaseModel):
    decision: Literal["Hire", "Hold", "No Hire"]
    overall_score_1_to_5: float
    strengths: List[str]
    gaps: List[str]
    risks: List[str]
    suggested_next_steps: List[str]
    summary: str


## 3. Create agents (LangChain + AzureChatOpenAI)

In [None]:
import json
import os
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("OPENAI_API_VERSION"),
    azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"),
    temperature=0.2,
)

jd_analyzer_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an HR tech assistant. Extract structured hiring requirements from the job description. Return only JSON."),
    ("human", "JOB DESCRIPTION: {jd}")
])

question_gen_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an interview designer. Create a balanced set of interview questions covering the focus areas. Return only JSON."),
    ("human", "JOB ANALYSIS (JSON): {analysis_json} Generate {n_questions} questions.")
])

evaluator_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an interview evaluator. Score the candidate answer using the rubric. Return only JSON."),
    ("human", "RUBRIC: {rubric} QUESTION: {question} EXPECTED SIGNALS:{signals} RED FLAGS: {red_flags} CANDIDATE ANSWER:{answer}")
])

recommender_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a hiring committee chair. Synthesize evaluations into a final recommendation. Return only JSON."),
    ("human", "JOB ANALYSIS (JSON): {analysis_json} EVALUATIONS (JSON): {evaluations_json}")
])

jd_analyzer = jd_analyzer_prompt | llm.with_structured_output(JobAnalysis)
question_generator = question_gen_prompt | llm.with_structured_output(QuestionSet)
answer_evaluator = evaluator_prompt | llm.with_structured_output(AnswerEvaluation)
recommender = recommender_prompt | llm.with_structured_output(HiringRecommendation)

## 4. Orchestration with LangGraph (multi-agent workflow + session memory)

Workflow:

`Analyze JD → Generate Questions → (Pick Q → Evaluate A)* → Recommend`

A **checkpointer** stores state per `thread_id` (session), enabling multi-turn memory.


In [None]:
from typing_extensions import TypedDict
from typing import Any, Dict, List
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver

class InterviewState(TypedDict, total=False):
    jd: str
    analysis: Dict[str, Any]
    questions: List[Dict[str, Any]]
    candidate_answers: List[str]
    evaluations: List[Dict[str, Any]]
    current_index: int
    current_question: Dict[str, Any]
    recommendation: Dict[str, Any]


def node_analyze_jd(state: InterviewState):
    analysis = jd_analyzer.invoke({"jd": state["jd"]}).model_dump()
    return {"analysis": analysis}


def node_generate_questions(state: InterviewState):
    qset = question_generator.invoke({
        "analysis_json": json.dumps(state["analysis"], ensure_ascii=False, indent=2),
        "n_questions": 8,
    }).model_dump()
    return {"questions": list(qset["questions"]), "current_index": 0, "evaluations": []}


def node_pick_question(state: InterviewState):
    idx = state.get("current_index", 0)
    return {"current_question": state["questions"][idx]}


def node_evaluate_answer(state: InterviewState):
    idx = state["current_index"]
    q = state["current_question"]
    answer = state["candidate_answers"][idx] if idx < len(state.get("candidate_answers", [])) else ""

    rubric = "".join(state["analysis"].get("evaluation_rubric", []))

    ev = answer_evaluator.invoke({
        "rubric": rubric,
        "question": q["question"],
        "signals": "".join(q.get("expected_signals", [])),
        "red_flags": "".join(q.get("red_flags", [])),
        "answer": answer,
    }).model_dump()

    ev["question_id"] = q["id"]
    return {"evaluations": state.get("evaluations", []) + [ev], "current_index": idx + 1}


def should_continue(state: InterviewState):
    idx = state.get("current_index", 0)
    if idx < len(state.get("questions", [])) and idx < len(state.get("candidate_answers", [])):
        return "continue"
    return "recommend"


def node_recommend(state: InterviewState):
    rec = recommender.invoke({
        "analysis_json": json.dumps(state["analysis"], ensure_ascii=False, indent=2),
        "evaluations_json": json.dumps(state.get("evaluations", []), ensure_ascii=False, indent=2),
    }).model_dump()
    return {"recommendation": rec}


builder = StateGraph(InterviewState)
builder.add_node("analyze_jd", node_analyze_jd)
builder.add_node("generate_questions", node_generate_questions)
builder.add_node("pick_question", node_pick_question)
builder.add_node("evaluate_answer", node_evaluate_answer)
builder.add_node("recommend", node_recommend)

builder.add_edge(START, "analyze_jd")
builder.add_edge("analyze_jd", "generate_questions")
builder.add_edge("generate_questions", "pick_question")
builder.add_edge("pick_question", "evaluate_answer")

builder.add_conditional_edges("evaluate_answer", should_continue, {
    "continue": "pick_question",
    "recommend": "recommend",
})

builder.add_edge("recommend", END)

checkpointer = MemorySaver()
interview_graph = builder.compile(checkpointer=checkpointer)
print("Graph compiled")

## 5. Demo run

In [None]:
sample_jd = '''
Role: Senior Data Engineer
Responsibilities:
- Build and maintain data pipelines (batch and streaming) using Python and Spark
- Design data models in a cloud data warehouse (Azure Synapse / Databricks / Snowflake)
- Implement CI/CD, testing, and monitoring for data workflows
- Collaborate with analysts and data scientists to deliver curated datasets
Requirements:
- 5+ years in data engineering
- Strong Python, SQL, Spark
- Experience with Azure (ADF, Databricks) preferred
- Knowledge of data governance and security
'''.strip()

In [None]:
import json

# 1) Analyze JD
analysis_obj = jd_analyzer.invoke({"jd": sample_jd})
analysis = analysis_obj.model_dump()

# 2) Generate Questions
qset_obj = question_generator.invoke({
    "analysis_json": json.dumps(analysis, ensure_ascii=False, indent=2),
    "n_questions": 3
})
questions = [q.model_dump() if hasattr(q, "model_dump") else q for q in qset_obj.questions]

# 3) Collect answers interactively
answers = []
for q in questions:
    print("\nQUESTION:", q["question"])
    ans = input("Your answer: ")
    answers.append(ans)

# 4) Evaluate answers
evaluations = []
rubric = "\n".join(analysis.get("evaluation_rubric", []))

for q, ans in zip(questions, answers):
    ev = answer_evaluator.invoke({
        "rubric": rubric,
        "question": q["question"],
        "signals": "\n".join(q.get("expected_signals", [])),
        "red_flags": "\n".join(q.get("red_flags", [])),
        "answer": ans
    }).model_dump()
    ev["question_id"] = q["id"]
    evaluations.append(ev)

# 5) Final recommendation
rec = recommender.invoke({
    "analysis_json": json.dumps(analysis, ensure_ascii=False, indent=2),
    "evaluations_json": json.dumps(evaluations, ensure_ascii=False, indent=2)
}).model_dump()

print("\n=== FINAL RECOMMENDATION ===")
print(json.dumps(rec, indent=2, ensure_ascii=False))


## 6. Optional: Load job description from Azure Blob Storage

Store job descriptions and transcripts in Blob Storage if needed.


In [None]:
# Optional helper (not executed by default)
# from azure.storage.blob import BlobServiceClient
#
# def load_text_from_blob(conn_str: str, container: str, blob_name: str) -> str:
#     bsc = BlobServiceClient.from_connection_string(conn_str)
#     blob_client = bsc.get_blob_client(container=container, blob=blob_name)
#     return blob_client.download_blob().readall().decode('utf-8')
