In [5]:
from azure.identity import AzureCliCredential, get_bearer_token_provider
import openai

class AzureOpenAIClient:
    """
    A class to interact with Azure OpenAI API using Azure CLI authentication.
    """
    
    def __init__(self, azure_endpoint_url, api_version="2024-02-15-preview", bearer_token_url='https://cognitiveservices.azure.com/.default'):
        """
        Initialize the Azure OpenAI client.
        
        Args:
            azure_endpoint_url (str): The Azure OpenAI endpoint URL
            api_version (str): The API version to use
            bearer_token_url (str): The bearer token URL for authentication
        """
        self.azure_endpoint_url = azure_endpoint_url
        self.api_version = api_version
        self.bearer_token_url = bearer_token_url
        self.credential = self._get_cli_credential()
        self.token_provider = self._get_token_provider()
        self.client = self._get_openai_client()
    
    def _get_cli_credential(self):
        """Get Azure CLI credentials."""
        return AzureCliCredential()
    
    def _get_token_provider(self):
        """Get bearer token provider."""
        return get_bearer_token_provider(self.credential, self.bearer_token_url)
    
    def _get_openai_client(self):
        """Create and return the OpenAI client."""
        return openai.AzureOpenAI(
            api_version=self.api_version,
            azure_endpoint=self.azure_endpoint_url,
            azure_ad_token_provider=self.token_provider
        )
    
    def get_completion(self, user_prompt, system_prompt=None, model="gpt-4o", max_tokens=100, temperature=0.5, timeout_seconds=10):
        """
        Get a completion from the Azure OpenAI API.
        
        Args:
            user_prompt (str): The user's prompt/question
            system_prompt (str, optional): System prompt to set context/behavior
            model (str): The model to use
            max_tokens (int): Maximum tokens in the response
            temperature (float): Sampling temperature (0-1)
            timeout_seconds (int): Request timeout in seconds
            
        Returns:
            str: The response text from the model
        """
        messages = []
        
        # Add system prompt if provided
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        
        # Add user prompt
        messages.append({"role": "user", "content": user_prompt})
        
        # Make the API call
        response = self.client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            timeout=timeout_seconds
        )
        
        # Extract and return the text content
        return response.choices[0].message.content
    
    def get_full_response(self, user_prompt, system_prompt=None, model="gpt-4o", max_tokens=100, temperature=0.5, timeout_seconds=10):
        """
        Get the full response object from the Azure OpenAI API.
        
        Args:
            user_prompt (str): The user's prompt/question
            system_prompt (str, optional): System prompt to set context/behavior
            model (str): The model to use
            max_tokens (int): Maximum tokens in the response
            temperature (float): Sampling temperature (0-1)
            timeout_seconds (int): Request timeout in seconds
            
        Returns:
            ChatCompletion: The full response object
        """
        messages = []
        
        # Add system prompt if provided
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        
        # Add user prompt
        messages.append({"role": "user", "content": user_prompt})
        
        # Make the API call
        response = self.client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            timeout=timeout_seconds
        )
        
        return response

# Initialize the client; these are custom values for an Azure OpenAI endpoint accessible to a team member
azure_endpoint_url = 'https://winsightsbotwus.openai.azure.com/'
api_version = "2024-02-15-preview"

ai_client = AzureOpenAIClient(azure_endpoint_url, api_version)

print("Azure OpenAI Client initialized successfully!")

Azure OpenAI Client initialized successfully!


In [8]:
import json
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath("SamplePromptEvaluator.ipynb"))
parent_dir = os.path.dirname(notebook_dir)

# Load the SustainabilityCoach System Prompt
system_prompt_path = os.path.join(parent_dir, "SustainabilityCoach-SystemPrompt.txt")
with open(system_prompt_path, 'r', encoding='utf-8') as f:
    sustainability_system_prompt = f.read()

print(f"✓ Loaded system prompt from: {system_prompt_path}")
print(f"  Length: {len(sustainability_system_prompt)} characters\n")

# Load the Sample User Inputs JSON
user_input_path = os.path.join(notebook_dir, "SampleUserInput.json")
with open(user_input_path, 'r', encoding='utf-8') as f:
    sample_user_inputs = json.load(f)

print(f"✓ Loaded sample user inputs from: {user_input_path}")
print(f"  Description: {sample_user_inputs.get('description', 'N/A')}")
print(f"  Number of entries: {len(sample_user_inputs.get('entries', []))}")

# Display first few categories
if 'entries' in sample_user_inputs and len(sample_user_inputs['entries']) > 0:
    print("\n  Sample categories:")
    for i, entry in enumerate(sample_user_inputs['entries'][:5]):
        category = entry.get('category', 'Unknown')
        demographic = entry.get('demographic', 'Unknown')
        input_count = len(entry.get('inputs', []))
        print(f"    {i+1}. {category} - {demographic} ({input_count} inputs)")

✓ Loaded system prompt from: c:\Hackathon\Oct-4-Hackathon-2025-hack16\resources\SustainabilityCoach-SystemPrompt.txt
  Length: 6317 characters

✓ Loaded sample user inputs from: c:\Hackathon\Oct-4-Hackathon-2025-hack16\resources\SamplePromptEvaluator\SampleUserInput.json
  Description: Sample User Inputs for SustainIQ Sustainability Coach - Representing diverse demographics, scenarios, and voice input patterns
  Number of entries: 30

  Sample categories:
    1. Morning Routine (Home) - Young Professional, Female, 28 (3 inputs)
    2. Morning Routine (Home) - Middle-aged Parent, Male, 45 (3 inputs)
    3. Morning Routine (Home) - Senior, Female, 67 (3 inputs)
    4. Morning Routine (Home) - Young Adult, Male, 22 (3 inputs)
    5. Mid-Morning (Work/Errands) - Business Executive, Female, 52 (3 inputs)


In [12]:
import re

def extract_json_from_response(response_text):
    """
    Extract JSON from a response that might be wrapped in markdown code blocks.
    Also handles common JSON formatting issues like trailing commas.
    
    Args:
        response_text (str): The response text that may contain JSON
        
    Returns:
        dict: Parsed JSON object, or None if parsing fails
    """
    # Try to parse directly first
    try:
        return json.loads(response_text)
    except json.JSONDecodeError:
        pass
    
    # Try to extract from markdown code blocks
    json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
    match = re.search(json_pattern, response_text, re.DOTALL)
    
    if match:
        json_string = match.group(1)
        # Remove trailing commas before closing braces/brackets
        json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)
        try:
            return json.loads(json_string)
        except json.JSONDecodeError:
            pass
    
    # Try to find any JSON-like structure
    json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
    matches = re.finditer(json_pattern, response_text, re.DOTALL)
    
    for match in matches:
        json_string = match.group(0)
        # Remove trailing commas
        json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)
        try:
            return json.loads(json_string)
        except json.JSONDecodeError:
            continue
    
    return None

print("✓ Helper function 'extract_json_from_response' defined (with trailing comma handling)")

✓ Helper function 'extract_json_from_response' defined (with trailing comma handling)


In [13]:
# Example: Using the loaded system prompt and sample user input with the AI client

# Get a random user input from the first category
first_entry = sample_user_inputs['entries'][0]
sample_user_action = first_entry['inputs'][0]

print(f"Category: {first_entry['category']}")
print(f"Demographic: {first_entry['demographic']}")
print(f"User Action: {sample_user_action}\n")
print("=" * 80)

# Call the AI with the sustainability system prompt and user input
response_text = ai_client.get_completion(
    user_prompt=sample_user_action,
    system_prompt=sustainability_system_prompt,
    model="gpt-4o",
    max_tokens=500,
    temperature=0.7
)

print("\nAI Response:")
print(response_text)

# Extract and parse JSON from the response
response_json = extract_json_from_response(response_text)

if response_json:
    print("\n" + "=" * 80)
    print("Parsed JSON Response:")
    print(json.dumps(response_json, indent=2))
    
    # Display key information in a readable format
    print("\n" + "=" * 80)
    print("Summary:")
    print(f"  Original Action: {response_json.get('original_action', 'N/A')}")
    print(f"  Suggestion: {response_json.get('suggestion', 'N/A')}")
    print(f"  Impact Indicator: {response_json.get('impact_indicator', 'N/A').upper()}")
    print(f"  Status: {response_json.get('evaluation_status', 'N/A')}")
else:
    print("\n⚠ Could not parse JSON from response")

Category: Morning Routine (Home)
Demographic: Young Professional, Female, 28
User Action: Hey, I'm about to order an Uber to get to my office downtown


AI Response:
```json
{
  "original_action": "Taking an Uber to commute to the office downtown",
  "suggestion": "Consider using public transit or a shared ride option like Uber Pool if available.",
  "rationale": "Public transit significantly reduces emissions per passenger compared to single-occupancy vehicles. If public transit isn’t feasible, opting for a shared ride like Uber Pool can cut per-person emissions by sharing the trip with others heading in the same direction.",
  "metrics": {
    "original_impact": "4.6 kg CO₂ for a 10 km single-occupancy ride",
    "suggested_impact": "1.8 kg CO₂ for shared ride or 0.5 kg CO₂ for public transit",
  },
  "impact_indicator": "high",
  "evaluation_status": "clear"
}
```

Parsed JSON Response:
{
  "original_action": "Taking an Uber to commute to the office downtown",
  "suggestion": "Consi

In [14]:
import time
from datetime import datetime

# Prepare results collection
all_results = {
    "metadata": {
        "processed_at": datetime.now().isoformat(),
        "system_prompt_file": "SustainabilityCoach-SystemPrompt.txt",
        "model": "gpt-4o",
        "total_entries": 0,
        "total_inputs_processed": 0
    },
    "results": []
}

# Process all categories and user inputs
total_processed = 0
total_entries = len(sample_user_inputs.get('entries', []))

print(f"Starting to process {total_entries} categories...\n")

for entry_idx, entry in enumerate(sample_user_inputs.get('entries', [])):
    category = entry.get('category', 'Unknown')
    demographic = entry.get('demographic', 'Unknown')
    user_inputs = entry.get('inputs', [])
    
    print(f"[{entry_idx + 1}/{total_entries}] Processing category: {category} - {demographic}")
    print(f"  Number of inputs: {len(user_inputs)}")
    
    for input_idx, user_input in enumerate(user_inputs):
        try:
            # Call the AI with the sustainability system prompt
            response_text = ai_client.get_completion(
                user_prompt=user_input,
                system_prompt=sustainability_system_prompt,
                model="gpt-4o",
                max_tokens=500,
                temperature=0.7
            )
            
            # Extract JSON from response
            response_json = extract_json_from_response(response_text)
            
            # Store the result
            result_entry = {
                "category": category,
                "demographic": demographic,
                "input_index": input_idx,
                "original_input": user_input,
                "raw_response": response_text,
                "parsed_json": response_json,
                "processing_status": "success" if response_json else "json_parse_failed"
            }
            
            all_results["results"].append(result_entry)
            total_processed += 1
            
            print(f"    [{input_idx + 1}/{len(user_inputs)}] Processed: {user_input[:50]}...")
            
            # Small delay to avoid rate limiting
            time.sleep(0.5)
            
        except Exception as e:
            print(f"    [{input_idx + 1}/{len(user_inputs)}] ERROR: {str(e)}")
            result_entry = {
                "category": category,
                "demographic": demographic,
                "input_index": input_idx,
                "original_input": user_input,
                "raw_response": None,
                "parsed_json": None,
                "processing_status": "error",
                "error_message": str(e)
            }
            all_results["results"].append(result_entry)
    
    print(f"  ✓ Completed category {entry_idx + 1}/{total_entries}\n")

# Update metadata
all_results["metadata"]["total_entries"] = total_entries
all_results["metadata"]["total_inputs_processed"] = total_processed

print("=" * 80)
print(f"✓ Processing complete!")
print(f"  Total inputs processed: {total_processed}")
print(f"  Total results: {len(all_results['results'])}")

# Save results to JSON file
output_filename = os.path.join(notebook_dir, f"evaluation_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
with open(output_filename, 'w', encoding='utf-8') as f:
    json.dump(all_results, f, indent=2, ensure_ascii=False)

print(f"\n✓ Results saved to: {output_filename}")
print(f"  File size: {os.path.getsize(output_filename) / 1024:.2f} KB")


Starting to process 30 categories...

[1/30] Processing category: Morning Routine (Home) - Young Professional, Female, 28
  Number of inputs: 3
    [1/3] Processed: Hey, I'm about to order an Uber to get to my offic...
    [1/3] Processed: Hey, I'm about to order an Uber to get to my offic...
    [2/3] Processed: I need to grab breakfast... thinking about getting...
    [2/3] Processed: I need to grab breakfast... thinking about getting...
    [3/3] Processed: Running late, gonna take a quick five-minute showe...
    [3/3] Processed: Running late, gonna take a quick five-minute showe...
  ✓ Completed category 1/30

[2/30] Processing category: Morning Routine (Home) - Middle-aged Parent, Male, 45
  Number of inputs: 3
  ✓ Completed category 1/30

[2/30] Processing category: Morning Routine (Home) - Middle-aged Parent, Male, 45
  Number of inputs: 3
    [1/3] Processed: I'm driving the kids to school this morning, it's ...
    [1/3] Processed: I'm driving the kids to school this morning,

In [15]:
import glob
from collections import defaultdict

# Find all evaluation results JSON files in the notebook directory
results_files = glob.glob(os.path.join(notebook_dir, "evaluation_results_*.json"))

print(f"Found {len(results_files)} evaluation results file(s):")
for file in results_files:
    file_size = os.path.getsize(file) / 1024
    print(f"  - {os.path.basename(file)} ({file_size:.2f} KB)")

if not results_files:
    print("\n⚠ No evaluation results files found. Please run the evaluation first.")
else:
    # Load the most recent file (or you can load all and merge)
    latest_file = max(results_files, key=os.path.getmtime)
    print(f"\n✓ Loading: {os.path.basename(latest_file)}")
    
    with open(latest_file, 'r', encoding='utf-8') as f:
        evaluation_data = json.load(f)
    
    print(f"  Total results: {len(evaluation_data.get('results', []))}")
    print(f"  Processed at: {evaluation_data['metadata'].get('processed_at', 'N/A')}")


Found 1 evaluation results file(s):
  - evaluation_results_20251004_135831.json (176.41 KB)

✓ Loading: evaluation_results_20251004_135831.json
  Total results: 95
  Processed at: 2025-10-04T13:54:29.042581


In [18]:
import html

def load_template_files(template_dir):
    """
    Load CSS and HTML template files from the specified directory.
    
    Args:
        template_dir (str): Directory containing template files
        
    Returns:
        tuple: (css_content, html_template)
    """
    css_path = os.path.join(template_dir, 'report_styles.css')
    html_template_path = os.path.join(template_dir, 'report_template.html')
    
    with open(css_path, 'r', encoding='utf-8') as f:
        css_content = f.read()
    
    with open(html_template_path, 'r', encoding='utf-8') as f:
        html_template = f.read()
    
    return css_content, html_template

def build_results_html(grouped_results):
    """
    Build the results section HTML from grouped results.
    
    Args:
        grouped_results (dict): Results grouped by category
        
    Returns:
        str: HTML content for the results section
    """
    results_html = ""
    
    for category, category_results in sorted(grouped_results.items()):
        results_html += f"""
            <div class="category-section">
                <div class="category-header" onclick="toggleCategory(this)">
                    <div class="category-title">{html.escape(category)}</div>
                    <div class="category-count">{len(category_results)} results</div>
                </div>
                <div class="category-content">
"""
        
        # Add each result in this category
        for idx, result in enumerate(category_results):
            demographic = result.get('demographic', 'N/A')
            original_input = result.get('original_input', 'N/A')
            parsed_json = result.get('parsed_json')
            raw_response = result.get('raw_response', 'N/A')
            status = result.get('processing_status', 'unknown')
            error_msg = result.get('error_message')
            
            # Determine status badge
            if status == 'success':
                status_class = 'status-success'
                status_text = '✓ Success'
            elif status == 'error':
                status_class = 'status-error'
                status_text = '✗ Error'
            else:
                status_class = 'status-failed'
                status_text = '⚠ Parse Failed'
            
            results_html += f"""
                    <div class="result-card">
                        <div class="result-header">
                            <div class="demographic-badge">👤 {html.escape(demographic)}</div>
                            <div class="status-badge {status_class}">{status_text}</div>
                        </div>
                        
                        <div class="input-section">
                            <div class="section-label">📝 User Input</div>
                            <div class="input-text">{html.escape(original_input)}</div>
                        </div>
"""
            
            # Add parsed JSON output if available
            if parsed_json:
                suggestion = parsed_json.get('suggestion', 'N/A')
                rationale = parsed_json.get('rationale', '')
                impact = parsed_json.get('impact_indicator', 'unknown').upper()
                metrics = parsed_json.get('metrics', {})
                
                # Determine impact class
                impact_class = 'impact-low'
                if impact in ['HIGH', 'POSITIVE', 'GREEN']:
                    impact_class = 'impact-high'
                elif impact in ['MEDIUM', 'MODERATE']:
                    impact_class = 'impact-medium'
                
                results_html += f"""
                        <div class="response-section">
                            <div class="section-label">💡 AI Response</div>
                            
                            <div class="suggestion-box">
                                <strong>Suggestion:</strong> {html.escape(suggestion)}
                            </div>
"""
                
                # Add rationale if available
                if rationale:
                    results_html += f"""
                            <div class="rationale-box">
                                <strong>Why?</strong> {html.escape(rationale)}
                            </div>
"""
                
                # Add metrics if available
                if metrics:
                    original_impact = metrics.get('original_impact', '')
                    suggested_impact = metrics.get('suggested_impact', '')
                    
                    if original_impact or suggested_impact:
                        results_html += """
                            <div class="metrics-box">
                                <strong>📊 Impact Comparison:</strong>
"""
                        if original_impact:
                            results_html += f"""
                                <div class="metric-item">
                                    <span class="metric-label">Current:</span>{html.escape(original_impact)}
                                </div>
"""
                        if suggested_impact:
                            results_html += f"""
                                <div class="metric-item">
                                    <span class="metric-label">Suggested:</span>{html.escape(suggested_impact)}
                                </div>
"""
                        results_html += """
                            </div>
"""
                
                results_html += f"""
                            <div class="impact-indicator {impact_class}">
                                Impact: {html.escape(impact)}
                            </div>
                            
                            <button class="toggle-raw" onclick="toggleRaw(this)">Show Full JSON</button>
                            <div class="json-output raw-response">{html.escape(json.dumps(parsed_json, indent=2))}</div>
                        </div>
"""
            
            # Add error message if present
            if error_msg:
                results_html += f"""
                        <div class="error-message">
                            <strong>Error:</strong> {html.escape(error_msg)}
                        </div>
"""
            
            # Add raw response toggle
            if raw_response and raw_response != 'N/A':
                results_html += f"""
                        <button class="toggle-raw" onclick="toggleRaw(this)">Show Raw Response</button>
                        <div class="raw-response">{html.escape(raw_response)}</div>
"""
            
            results_html += """
                    </div>
"""
        
        results_html += """
                </div>
            </div>
"""
    
    return results_html

def generate_html_report(evaluation_data, output_path, template_dir=None):
    """
    Generate an HTML report from evaluation results, grouped by category.
    
    Args:
        evaluation_data (dict): Evaluation results data
        output_path (str): Path to save the HTML report
        template_dir (str, optional): Directory containing template files
    
    Returns:
        str: Path to the generated HTML file
    """
    if template_dir is None:
        template_dir = os.path.dirname(output_path)
    
    # Load external CSS and HTML template
    css_content, html_template = load_template_files(template_dir)
    
    results = evaluation_data.get('results', [])
    metadata = evaluation_data.get('metadata', {})
    
    # Group results by category
    grouped_results = defaultdict(list)
    for result in results:
        category = result.get('category', 'Unknown')
        grouped_results[category].append(result)
    
    # Build the results HTML section
    results_content = build_results_html(grouped_results)
    
    # Replace placeholders in the template
    html_content = html_template.replace('{{CSS_CONTENT}}', css_content)
    html_content = html_content.replace('{{PROCESSED_AT}}', metadata.get('processed_at', 'N/A'))
    html_content = html_content.replace('{{MODEL}}', metadata.get('model', 'N/A'))
    html_content = html_content.replace('{{TOTAL_CATEGORIES}}', str(len(grouped_results)))
    html_content = html_content.replace('{{TOTAL_EVALUATIONS}}', str(len(results)))
    html_content = html_content.replace('{{RESULTS_CONTENT}}', results_content)
    
    # Write HTML to file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html_content)
    
    return output_path

# Generate the HTML report
if results_files:
    # Create output filename based on input filename
    # Change 'evaluation_results_*.json' to 'evaluation_report_*.html'
    input_basename = os.path.basename(latest_file)
    output_basename = input_basename.replace('evaluation_results_', 'evaluation_report_').replace('.json', '.html')
    html_output_path = os.path.join(notebook_dir, output_basename)
    
    generated_file = generate_html_report(evaluation_data, html_output_path)
    
    print(f"\n✓ HTML report generated successfully!")
    print(f"  Input:  {os.path.basename(latest_file)}")
    print(f"  Output: {os.path.basename(generated_file)}")
    print(f"  Size: {os.path.getsize(generated_file) / 1024:.2f} KB")
    print(f"\n  📁 Template files used:")
    print(f"     - report_styles.css")
    print(f"     - report_template.html")
    print(f"\n  🌐 Open this file in a browser to view the interactive report.")
    print(f"  Full path: {generated_file}")



✓ HTML report generated successfully!
  Input:  evaluation_results_20251004_135831.json
  Output: evaluation_report_20251004_135831.html
  Size: 429.29 KB

  📁 Template files used:
     - report_styles.css
     - report_template.html

  🌐 Open this file in a browser to view the interactive report.
  Full path: c:\Hackathon\Oct-4-Hackathon-2025-hack16\resources\SamplePromptEvaluator\evaluation_report_20251004_135831.html
