# Imports & Setup

In [12]:
%pip install --upgrade langchain google-generativeai pypdf
import os
import getpass

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API Key")



In [13]:
%pip install --upgrade langchain google-generativeai langchain-google-genai

Collecting langchain-google-genai
  Using cached langchain_google_genai-2.1.8-py3-none-any.whl.metadata (7.0 kB)
INFO: pip is looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
  Using cached langchain_google_genai-2.1.7-py3-none-any.whl.metadata (7.0 kB)
  Using cached langchain_google_genai-2.1.6-py3-none-any.whl.metadata (7.0 kB)
  Using cached langchain_google_genai-2.1.5-py3-none-any.whl.metadata (5.2 kB)
  Using cached langchain_google_genai-2.1.4-py3-none-any.whl.metadata (5.2 kB)
  Using cached langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
  Using cached langchain_google_genai-2.1.2-py3-none-any.whl.metadata (4.7 kB)
  Using cached langchain_google_genai-2.1.1-py3-none-any.whl.metadata (4.7 kB)
INFO: pip is still looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
  Using ca

# Helper functions & Setup

In [14]:
import os
from pypdf import PdfReader

def load_document(file_path: str) -> str:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found at {file_path}")

    _, file_extension = os.path.splitext(file_path)
    file_extension = file_extension.lower()

    text = ""
    if file_extension == ".txt":
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
    elif file_extension == ".pdf":
        reader = PdfReader(file_path)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

    return text

# Langchain Setup

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.memory import ConversationBufferMemory

# Instantiate the language model
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)

# Instantiate the memory component
memory = ConversationBufferMemory()

  memory = ConversationBufferMemory()


In [15]:
from langchain.prompts import PromptTemplate

template = """
You are an expert AI Interviewer for a high-stakes technical interview. Your role is to assess the candidate’s qualifications, skills, and thinking based on the job description and resume.

**Strictly follow these rules:**
1. Ask *one* clear, concise, and relevant interview question at a time.
2. Use the resume to probe deeply into specific experiences or skills the candidate claims.
3. Use the job description to test alignment and required competencies.
4. Use chat history to:
   - Ask meaningful follow-ups.
   - Avoid repeating any prior questions or topics.
5. Critically evaluate the candidate’s latest response (`Candidate Response`) and:
   - Detect generic, evasive, or inconsistent answers.
   - Escalate difficulty or ask for clarification if the answer seems suspicious or copied.
   - Demand specificity, reasoning, or examples when appropriate.

Avoid small talk. Be professional and direct.

---

**Resume:**
{resume}

**Job Description:**
{job_description}

**Conversation So Far:**
{chat_history}

**Candidate Response:**
{human_input}

---

Based on all the above, ask your next best interview question or follow-up that rigorously tests their skills, depth of knowledge, or experience:"""

interview_prompt = PromptTemplate(
    input_variables=["resume", "job_description", "chat_history", "human_input"],
    template=template,
)


In [16]:
def generate_question(llm, memory, interview_prompt, resume, job_description, human_input):
    """
    Generates the next interview question using the language model and prompt template.

    Args:
        llm: The language model instance.
        memory: The conversation memory object.
        interview_prompt: The prompt template object.
        resume: The resume text.
        job_description: The job description text.
        human_input: The candidate's current response.

    Returns:
        The generated interview question (AI's response).
    """
    # Get chat history from memory
    chat_history = memory.buffer

    # Format the prompt
    formatted_prompt = interview_prompt.format(
        resume=resume,
        job_description=job_description,
        chat_history=chat_history,
        human_input=human_input
    )

    # Invoke the language model
    response = llm.invoke(formatted_prompt)

    # Return the generated question
    return response.content


In [17]:
def main():
    """
    Runs the main interview loop.
    """
    resume_path = "resume.txt"
    jd_path = "job_description.txt"

    try:
        resume_text = load_document(resume_path)
        jd_text = load_document(jd_path)
        print("Files loaded successfully.")
    except (FileNotFoundError, ValueError) as e:
        print(f"Error loading files: {e}")
        return

    # Components are already initialized in previous steps: llm, memory, interview_prompt

    question_counter = 0

    print("\nStarting interview. Type 'quit' or 'exit' to end.")

    human_input = "" # Initial empty input for the first question

    while True and question_counter < 3:
        # Generate the next question
        ai_question = generate_question(llm, memory, interview_prompt, resume_text, jd_text, human_input)

        # Print the AI's question
        print(f"\nAI Interviewer: {ai_question}")

        # Get candidate's response
        human_input = input("Your response: ")

        # Check for exit command
        if human_input.lower() in ["quit", "exit"]:
            print("Ending interview.")
            # Save conversation history before breaking
            history_file = "interview_history.txt"
            with open(history_file, "w", encoding="utf-8") as f:
                # Access the chat history from memory
                chat_history = memory.buffer
                f.write(chat_history)
            print(f"Conversation history saved to {history_file}")
            break

        # Save context to memory
        # The generate_question function already takes the latest human_input,
        # so we save the AI's question and the human's response to memory *after*
        # the question has been generated using the previous turn's human_input.
        # This ensures the memory for the *next* turn includes the current turn's interaction.
        memory.save_context({"ai": ai_question} , {"human": human_input})
        question_counter = question_counter + 1


if __name__ == "__main__":
    # Create dummy files for testing if they don't exist
    if not os.path.exists("resume.txt"):
        with open("resume.txt", "w") as f:
            f.write("Resume content: Experienced software engineer with skills in Python and data analysis.")
    if not os.path.exists("jd.txt"):
        with open("jd.txt", "w") as f:
            f.write("Job Description content: Seeking a software engineer with experience in Python, machine learning, and cloud computing.")

    main()

Files loaded successfully.

Starting interview. Type 'quit' or 'exit' to end.

AI Interviewer: Your resume highlights your work at Technopurple IT Solutions, where you "implemented a machine learning-based system that significantly enhanced ETA accuracy by 30% through regression and time series analysis." Given the Associate Data Scientist role's focus on energy load forecasting using time series techniques, can you describe the specific time series models or algorithms you employed for ETA prediction, and how you evaluated their performance and selected the best approach?
Your response: I have used LSTM, please hire me

AI Interviewer: You mentioned using LSTM for ETA prediction. Can you elaborate on how you specifically structured the input data and designed the LSTM architecture for this task? Furthermore, what specific metrics did you use to evaluate the LSTM's performance, and how did it compare to other regression or time series models you considered or implemented for that proje

# Summarizing & Scoring

In [18]:
history_path = 'interview_history.txt'
history_text = load_document(history_path)
print("Succesfully Loaded Interview History")

Succesfully Loaded Interview History


In [19]:
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import re # Import regex module for extracting the score

def calculate_score(llm, resume_text: str, jd_text: str, conversation_history: str) -> tuple[int | None, str]:
    """
    Calculates a numerical score (on a scale of 1-10) and a qualitative assessment
    of the candidate's performance based on the resume, job description, and
    conversation history using an LLM.

    Args:
        llm: The language model instance.
        resume_text: The resume text.
        jd_text: The job description text.
        conversation_history: The complete conversation history as a string.

    Returns:
        A tuple containing:
            - An integer score from 1 to 10, or None if a score cannot be extracted.
            - A string containing the qualitative assessment of the candidate.
    """
    scoring_template = """You are an AI evaluating a candidate's interview performance.
Given the resume, job description, and the full interview conversation history,
provide a qualitative assessment of the candidate's suitability for the role.
Consider the following:
- How well the candidate's skills and experience align with the job requirements.
- The relevance and depth of the candidate's answers.
- How effectively the candidate addressed the interviewer's questions.
- Overall communication clarity and confidence.

Provide a brief summary of their strengths and weaknesses based on the interview,
and a concluding assessment of their fit for the position.
Additionally, provide a numerical score from 1 to 10 (where 1 is poor and 10 is excellent)
at the very beginning of your response, formatted as "SCORE: [number]/10".

Resume:
{resume}

Job Description:
{job_description}

Conversation History:
{history}

Candidate Assessment:
"""

    scoring_prompt = PromptTemplate(
        input_variables=["resume", "job_description", "history"],
        template=scoring_template,
    )

    # Create a simple chain to format the prompt and invoke the LLM
    chain = (
        {"resume": RunnablePassthrough(), "job_description": RunnablePassthrough(), "history": RunnablePassthrough()}
        | scoring_prompt
        | llm
        | StrOutputParser()
    )


    # Invoke the chain with the necessary inputs
    full_assessment_text = chain.invoke({
        "resume": resume_text,
        "job_description": jd_text,
        "history": conversation_history
    })

    # Extract the score using regex
    score_match = re.search(r"SCORE:\s*(\d+)/10", full_assessment_text)
    score = int(score_match.group(1)) if score_match else None

    # The assessment text is the full response minus the score line if found
    assessment_text = full_assessment_text
    if score_match:
        assessment_text = re.sub(r"SCORE:\s*\d+/10\s*", "", full_assessment_text, 1).strip()


    return score, assessment_text

# Re-initialize llm and memory (as they might have been lost in between cell executions)
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.memory import ConversationBufferMemory

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)
memory = ConversationBufferMemory() # Memory is not directly used in calculate_score, but good practice to have it initialized if needed later

# Load the resume, job description, and conversation history
resume_path = "resume.txt"
jd_path = "job_description.txt"
history_file_path = "interview_history.txt"

try:
    resume_text = load_document(resume_path)
    jd_text = load_document(jd_path)
    conversation_history = load_document(history_file_path)
    print("Required files and history loaded successfully.")

    # Now call the calculate_score function and print both the score and the assessment
    print("\n--- Generating Candidate Assessment ---")
    candidate_score, candidate_assessment = calculate_score(llm, resume_text, jd_text, conversation_history)

    if candidate_score is not None:
        print(f"\nOverall Score: {candidate_score}/10")
    else:
        print("\nCould not extract a numerical score.")

    print("\n--- Candidate Assessment ---")
    print(candidate_assessment)

except (FileNotFoundError, ValueError) as e:
    print(f"Error loading files or history: {e}")

Required files and history loaded successfully.

--- Generating Candidate Assessment ---

Overall Score: 1/10

--- Candidate Assessment ---
**Qualitative Assessment of Candidate's Suitability for the Role**

The candidate's suitability for the Associate Data Scientist role is extremely poor, primarily due to their interview performance. While the resume presents a strong technical background and relevant experience, the interview conversation completely undermines any potential fit.

**How well the candidate's skills and experience align with the job requirements:**
Based solely on the resume, there appears to be a good alignment. The candidate has experience with time series analysis, regression, Python, relevant libraries (PyTorch, TensorFlow, scikit-learn), SQL, and cloud platforms (AWS, Azure), all of which are directly listed in the job description. The "ETA accuracy" project specifically mentions time series analysis and regression, which is highly relevant to energy load forecas