In [18]:
!pip install groq pymongo python-dotenv



In [19]:
!pip install "pymongo[srv]"








In [None]:
from pymongo import MongoClient
from dotenv import load_dotenv
import os

load_dotenv()

mongo_uri = os.getenv("MONGO_URI")

# Add `tls=True` to ensure TLS/SSL handshake works
client = MongoClient(mongo_uri, tls=True)

try:
    db = client["interview_coach"]
    print("✅ Connected to MongoDB Atlas!")
except Exception as e:
    print("❌ Connection failed:", e)


✅ Connected to MongoDB Atlas!


In [21]:
from pymongo import MongoClient

db = client["interview_coach"]
collection = db["questions"]
print(db.list_collection_names())


['questions', 'users']


In [22]:
def get_user_scores(user_id):
    user = db.users.find_one({"user_id": user_id})
    if not user:
        print("User not found.")
        return None

    return {
        "Technical": user.get("technical_score", 0),
        "Behavioral": user.get("behavioral_score", 0),
        "Scenario": user.get("scenario_score", 0)
    }


In [23]:
import random

def get_questions(question_type, user_score, num_questions=5):
    """
    Retrieve questions filtered by type and user_score.
    """
    query = {
        "question_type": question_type,
        "score_threshold": {"$lte": user_score}
    }
    cursor = collection.find(query)
    questions = list(cursor)
    selected = random.sample(questions, min(len(questions), num_questions))
    return selected

def retrieve_questions_for_job_interview(user_id, total_questions=15):
    scores = get_user_scores(user_id)
    if not scores:
        return []

    # Determine weight: lower scores = higher weight
    weight_map = {}
    for k, v in scores.items():
        if v <= 3:
            weight_map[k] = 3
        elif 4 <= v <= 6:
            weight_map[k] = 2
        else:
            weight_map[k] = 1

    # Compute total weight
    total_weight = sum(weight_map.values())

    # Compute number of questions per type
    questions_per_type = {}
    for k, weight in weight_map.items():
        questions_per_type[k] = round(total_questions * (weight / total_weight))

    all_questions = []
    total_collected = 0

    # Retrieve questions per type
    for qtype, num in questions_per_type.items():
        q = get_questions(qtype, scores[qtype], num_questions=num)
        all_questions.extend(q)
        total_collected += len(q)

    # If still fewer than total_questions, fill from ANY questions
    remaining_needed = total_questions - total_collected
    if remaining_needed > 0:
        # Collect all remaining questions regardless of type
        fallback_cursor = collection.find({})
        fallback_questions = list(fallback_cursor)

        # Exclude already selected
        fallback_questions = [fq for fq in fallback_questions if fq not in all_questions]

        additional = random.sample(
            fallback_questions, 
            min(len(fallback_questions), remaining_needed)
        )
        all_questions.extend(additional)

    # Shuffle final set
    random.shuffle(all_questions)
    return all_questions

In [24]:
# Example: Retrieve 5 technical questions for user with score 5
tech_qs = get_questions("Technical", user_score=5, num_questions=5)

# Print them nicely
for q in tech_qs:
    print(f"{q['question_id']}: {q['question_text']} (Threshold: {q['score_threshold']})")


TECH005: Differentiate supervised and unsupervised learning. (Threshold: 2)
TECH009: Explain gradient descent. (Threshold: 4)
TECH001: What is overfitting in machine learning? (Threshold: 3)
TECH003: What is normalization in data preprocessing? (Threshold: 2)
TECH011: Difference between classification and regression? (Threshold: 2)


In [25]:
# Example: Retrieve 5 technical questions for user with score 3
tech_qs = get_questions("Technical", user_score=3, num_questions=5)

# Print them nicely
for q in tech_qs:
    print(f"{q['question_id']}: {q['question_text']} (Threshold: {q['score_threshold']})")


TECH003: What is normalization in data preprocessing? (Threshold: 2)
TECH012: What is recall? (Threshold: 3)
TECH001: What is overfitting in machine learning? (Threshold: 3)
TECH001: What is overfitting in machine learning? (Threshold: 3)
TECH013: What is a confusion matrix? (Threshold: 3)


In [26]:
# Example usage
interview_questions = retrieve_questions_for_job_interview("user456", total_questions=8)

for q in interview_questions:
    print(f"{q['question_type']} - {q['question_id']}: {q['question_text']} (Threshold: {q['score_threshold']})")

Technical - TECH011: Difference between classification and regression? (Threshold: 2)
Technical - TECH001: What is overfitting in machine learning? (Threshold: 3)
Technical - TECH006: What is PCA? (Threshold: 5)
Technical - TECH003: What is normalization in data preprocessing? (Threshold: 2)
Behavioral - BEH014: Describe a time you demonstrated Empathy. (Threshold: 5)
Technical - TECH001: What is overfitting in machine learning? (Threshold: 3)
Scenario - SCEN002: Imagine you are faced with a conflict between team members. How would you handle it? (Threshold: 5)
Technical - TECH012: What is recall? (Threshold: 3)
