---
The simulation - #TAKE_1

In [0]:
pip install streamlit pandas google-generativeai
pip install pandas PyPDF2 openai langchain tqdm
API_KEY = 'AIzaSyBEV89GjyAbAUgTunqeyHNlPvHuTR7K3X8'

In [0]:
import os
import pandas as pd
import PyPDF2
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
import nltk
from typing import List, Dict

class InterviewSimulationApp:
    def __init__(self, gemini_api_key: str, job_data_path: str, questions_path: str, coding_questions_path: str):
        # Initialize Gemini API
        genai.configure(api_key=gemini_api_key)
        self.model = genai.GenerativeModel('gemini-flash')
        
        # Load data sources
        self.job_data = pd.read_csv(job_data_path)
        self.common_questions = pd.read_csv(questions_path)
        self.coding_questions = pd.read_json(coding_questions_path)
        
        # Initialize embeddings
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
    
    def extract_pdf_text(self, pdf_path: str) -> str:
        """Extract text from PDF"""
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ' '.join([page.extract_text() for page in reader.pages])
        return text
    
    def match_job_data(self, cv_text: str, job_title: str) -> Dict:
        """Match CV with appropriate job data"""
        job_matches = self.job_data[
            self.job_data['title'].str.contains(job_title, case=False)
        ]
        return job_matches.iloc[0].to_dict() if not job_matches.empty else {}
    
    def select_coding_questions(self, experience_level: str) -> List[Dict]:
        """Select coding questions based on experience"""
        difficulty_map = {
            'entry': ['easy'],
            'junior': ['easy', 'medium'],
            'mid-level': ['medium'],
            'senior': ['medium', 'hard']
        }
        difficulties = difficulty_map.get(experience_level, ['easy', 'medium'])
        
        return self.coding_questions[
            self.coding_questions['difficulty'].isin(difficulties)
        ].sample(3).to_dict('records')
    
    def generate_interview_questions(self, cv_text: str, job_title: str) -> List[str]:
        """Generate tailored interview questions"""
        job_data = self.match_job_data(cv_text, job_title)
        
        # Use Gemini to generate context-aware questions
        prompt = f"""Generate a comprehensive interview questionnaire based on:
        - CV Content: {cv_text}
        - Job Title: {job_title}
        - Company Industry: {job_data.get('industry', 'Not Specified')}
        
        Please create questions covering:
        1. Technical Skills
        2. Behavioral Competencies 
        3. Experience Validation
        4. Problem-Solving Scenarios"""
        
        response = self.model.generate_content(prompt)
        return response.text.split('\n')
    
    def analyze_interview_performance(self, questions: List[str], answers: List[str]) -> str:
        """Provide performance feedback using NLP and Gemini"""
        feedback_prompt = f"""Analyze the following interview:
        Questions: {questions}
        Candidate Answers: {answers}
        
        Provide feedback using STAR methodology:
        - Strengths to preserve
        - Areas for improvement
        - Potential red flags
        - Overall interview performance assessment"""
        
        feedback = self.model.generate_content(feedback_prompt)
        return feedback.text

def main():
    app = InterviewSimulationApp(
        gemini_api_key=os.getenv(API_KEY),
        job_data_path='job_data.csv',
        questions_path='interview_questions.csv',
        coding_questions_path='coding_questions.json'
    )
    
    print("Welcome to Interview Simulation!")
    print("You can copy-paste CV text or upload a PDF file.")
    
    # Additional interaction logic would be implemented here

if __name__ == '__main__':
    main()

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from PyPDF2 import PdfReader
from langchain.vectorstores import FAISS
from langchain_google_vertexai import VertexAIEmbeddings
import google.generativeai as genai

# Initialize Spark session
spark = SparkSession.builder \
    .appName("InterviewSimulator") \
    .getOrCreate()

class InterviewSimulator:
    def __init__(self, gemini_api_key, general_questions_file, job_data_file, code_questions_file):
        # Initialize Gemini API
        genai.configure(api_key=gemini_api_key)
        self.model = genai.GenerativeModel('gemini-flash')

        # Load datasets with PySpark
        self.general_questions_df = spark.read.csv(general_questions_file, header=True)
        self.job_data_df = spark.read.csv(job_data_file, header=True)
        self.code_questions_df = spark.read.json(code_questions_file)
        
        # Initialize embeddings for retrieval tasks
        self.embeddings = GooglePalmEmbeddings()  # Replace with actual Gemini embeddings generator
        self.job_vectorstore = self.create_vectorstore(self.job_data_df, "description")
        self.code_vectorstore = self.create_vectorstore(self.code_questions_df, "question")

    def create_vectorstore(self, df, column_name):
        """Create FAISS vector store for embeddings."""
        data = df.select(column_name).rdd.flatMap(lambda x: x).collect()
        metadata = df.rdd.map(lambda row: row.asDict()).collect()
        return FAISS.from_texts(texts=data, embedding=self.embeddings, metadatas=metadata)

    def extract_cv_text(self, cv_file=None, cv_text=None):
        """Extract text from a CV file or plain text."""
        if cv_file:
            reader = PdfReader(cv_file)
            return " ".join(page.extract_text() for page in reader.pages)
        elif cv_text:
            return cv_text
        else:
            raise ValueError("No CV text or file provided.")

    def generate_simulation(self, cv_text, job_title, company, industry, job_description):
        """Generate tailored interview simulation."""
        prompt = {
            "task": "generate_interview_simulation",
            "cv_text": cv_text,
            "job_title": job_title,
            "company": company,
            "industry": industry,
            "job_description": job_description,
            "general_questions": self.general_questions_df.toPandas()["question"].tolist(),
        }
        
        # Using Gemini Flash API to generate simulation
        response = gemini_flash.create_task(prompt)
        return response.get("simulation", "Failed to generate simulation.")

    def select_code_questions(self, job_title, seniority):
        """Select appropriate code questions using embeddings."""
        query = f"{job_title} {seniority}"
        results = self.code_vectorstore.similarity_search(query, k=5)
        
        # Filter based on seniority
        if seniority.lower() in ["student", "junior"]:
            filtered = [q for q in results if q["difficulty"] in ["easy", "medium"]]
        else:  # Senior or higher
            filtered = [q for q in results if q["difficulty"] in ["medium", "hard"]]
        
        return filtered[:5]

    def provide_feedback(self, responses):
        """Provide feedback on interview answers."""
        feedback_prompt = {
            "task": "generate_feedback",
            "responses": responses,
            "methodology": "STAR",
        }
        
        # Using Gemini Flash API to generate feedback
        response = gemini_flash.create_task(feedback_prompt)
        return response.get("feedback", "Failed to generate feedback.")

    def run_simulation(self, cv_file=None, cv_text=None, job_title="", company="", industry="", job_description=""):
        # Step 1: Extract CV text
        cv_text = self.extract_cv_text(cv_file, cv_text)
        
        # Step 2: Generate tailored interview questions
        interview = self.generate_simulation(cv_text, job_title, company, industry, job_description)
        print("\n--- Interview Simulation ---")
        print(interview)
        
        # Step 3: Collect user responses
        responses = {}
        questions = interview.split("\n")
        for i, question in enumerate(questions, start=1):
            response = input(f"Q{i}: {question}\nYour Answer: ")
            responses[f"Q{i}"] = response
            
            # Generate follow-up questions if needed
            follow_up_prompt = {
                "task": "generate_follow_up",
                "response": response,
                "max_questions": 2
            }
            follow_up = gemini_flash.create_task(follow_up_prompt)
            print("\nFollow-Up Questions:", follow_up.get("questions", "No follow-up questions generated."))
        
        # Step 4: Provide feedback
        feedback = self.provide_feedback(responses)
        print("\n--- Feedback ---")
        print(feedback)

        # Final message
        print("\nHope you're ready for your interview, good luck!")

# Example Usage
simulator = InterviewSimulator(
    general_questions_file="general_open_questions.csv",
    job_data_file="job_data.csv",
    code_questions_file="code_questions.json"
)
simulator.run_simulation(
    cv_file="sample_cv.pdf",  # Replace with actual CV file path
    job_title="Data Scientist",
    company="TechCorp",
    industry="Technology",
    job_description="Responsible for building machine learning models to analyze large datasets."
)
