In [1]:
import time
import pandas as pd
from _pipeline import create_payload, model_req

# Define reduced models to test (covering different architectures)
models_to_test = ["mistral", "llama3", "phi3"]

# Reduced parameter variations
temperature_values = [0.5, 1.0]  # Test low and high randomness
context_sizes = [1024]  # Single, optimal context size
num_predict_values = [300]  # Single, optimal output length

# Define test cases
test_cases = [
    {
        "name": "Reverse Prompting - Clarification Needed",
        "prompt": """
            You are an AI hiring assistant using Reverse Prompting. 
            Before making a hiring decision, ask clarifying questions to ensure fairness.

            **Job Description:**  
            We are hiring a Data Scientist with expertise in Python, Machine Learning, and SQL.  
            The candidate must have at least 3 years of experience and a background in statistical modeling.  

            **Candidate Resume:**  
            John Doe is a Data Scientist with 4 years of experience. Skilled in Python and Machine Learning, but lacks SQL experience.

            **Step 1: Initial Evaluation**  
            ✔ Extract job requirements and candidate qualifications.  
            ✔ Identify gaps (e.g., missing SQL experience).  

            **Step 2: Reverse Prompting - Clarification Questions**  
            Before finalizing, ask:  
            1️⃣ "Would you consider a candidate with strong Python & ML but no SQL experience?"  
            
            **Step 3: Final Decision (After Clarification)**
        """
    },
    {
        "name": "Multi-Persona Prompting - Hiring Perspectives",
        "prompt": """
            You are an AI hiring assistant evaluating a candidate using Multi-Persona Reasoning.

            **Step 1: Hiring Manager Perspective**  
            - Focuses on technical skills and experience gaps.  

            **Step 2: Technical Lead Perspective**  
            - Assesses problem-solving ability & adaptability.  

            **Step 3: HR Representative Perspective**  
            - Evaluates cultural fit and alignment with company values.  

            **Job Description:**  
            Hiring a Software Engineer with Java, AWS, and Kubernetes.  

            **Candidate Resume:**  
            Jane Doe has 5 years of experience in Java, AWS, and Kubernetes.  

            **Final Step: Aggregate the scores from all perspectives to generate a hiring recommendation.**
        """
    },
    {
        "name": "Tree of Thoughts - Hiring Evaluation",
        "prompt": """
            You are an AI hiring assistant using Tree of Thoughts (ToT) reasoning.
            Instead of evaluating the resume in a single step, explore multiple reasoning paths.

            **Thought 1: Technical Fit**  
            ✔ Does the candidate meet job requirements?  

            **Thought 2: Business Value**  
            ✔ Will this candidate provide long-term value to the company?  

            **Thought 3: Leadership & Growth**  
            ✔ Can the candidate evolve into a leadership role in the future?  

            **Step 1: Evaluate each perspective and assign a confidence score.**  
            **Step 2: Aggregate scores to generate a final hiring decision.**
        """
    }
]

# Function to run test cases across optimized models & parameters
def run_tests(test_cases, models, temperatures, context_sizes, num_predict_values):
    results = []

    for test in test_cases:
        for model in models:
            for temp in temperatures:
                for ctx_size in context_sizes:
                    for num_predict in num_predict_values:
                        print(f"\n🚀 Running test: {test['name']} | Model: {model} | Temp: {temp} | Context: {ctx_size} | Predict: {num_predict}...\n")

                        # Create payload
                        payload = create_payload(
                            target="ollama",
                            model=model,
                            prompt=test["prompt"],
                            temperature=temp,
                            num_ctx=ctx_size,
                            num_predict=num_predict
                        )

                        # Measure execution time
                        start_time = time.time()
                        time_taken, response = model_req(payload=payload)
                        end_time = time.time()

                        # Store results
                        results.append({
                            "Test Case": test["name"],
                            "Model": model,
                            "Temperature": temp,
                            "Context Size": ctx_size,
                            "Num Predict": num_predict,
                            "Time Taken (s)": round(end_time - start_time, 3),
                            "Response": response  # Store full response
                        })

                        # Print result summary
                        print(f"\n✅ **Test Completed: {test['name']} | Model: {model}**")
                        print(f"🕒 **Time Taken:** {round(end_time - start_time, 3)} seconds")
                        print(f"📌 **Response:** {response}\n")  # Print full response

    return results

# Run optimized test cases
test_results = run_tests(test_cases, models_to_test, temperature_values, context_sizes, num_predict_values)
test_results


🚀 Running test: Reverse Prompting - Clarification Needed | Model: mistral | Temp: 0.5 | Context: 1024 | Predict: 300...

{'model': 'mistral', 'prompt': '\n            You are an AI hiring assistant using Reverse Prompting. \n            Before making a hiring decision, ask clarifying questions to ensure fairness.\n\n            **Job Description:**  \n            We are hiring a Data Scientist with expertise in Python, Machine Learning, and SQL.  \n            The candidate must have at least 3 years of experience and a background in statistical modeling.  \n\n            **Candidate Resume:**  \n            John Doe is a Data Scientist with 4 years of experience. Skilled in Python and Machine Learning, but lacks SQL experience.\n\n            **Step 1: Initial Evaluation**  \n            ✔ Extract job requirements and candidate qualifications.  \n            ✔ Identify gaps (e.g., missing SQL experience).  \n\n            **Step 2: Reverse Prompting - Clarification Questions**  \n   

[{'Test Case': 'Reverse Prompting - Clarification Needed',
  'Model': 'mistral',
  'Temperature': 0.5,
  'Context Size': 1024,
  'Num Predict': 300,
  'Time Taken (s)': 58.127,
  'Response': '1. "Given the job requirements and the candidate\'s qualifications, would it be acceptable to consider John Doe, who has strong skills in Python and Machine Learning, but lacks SQL experience, for this Data Scientist position?"\n\nIf the answer is yes:\n\n**Step 4: Additional Considerations**\n\n1. Evaluate if additional training or onboarding resources are available for SQL skills development.\n2. Assess if there are any other essential tasks within the role where SQL proficiency is not required.\n3. Determine if John Doe\'s overall skillset and potential to grow in the company outweigh the lack of SQL experience.\n4. Consider conducting a technical test or interview focused on Python, Machine Learning, and statistical modeling to further evaluate John Doe\'s qualifications.\n\nIf the answer is n