In [1]:

import sys
import os

# Add parent directory to path
parent_dir = os.path.dirname(os.getcwd())
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

# Change to project root
os.chdir(parent_dir)
from dotenv import load_dotenv
from openai import OpenAI
import json
from services import UseCaseService

In [2]:
# Load environment
load_dotenv()
api_key = os.getenv("OPENROUTER_API_KEY")
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=api_key
)

# load example transcript

In [3]:
transcript_path = "test_data/transcripts/energy_workshop_transcript.txt"

with open(transcript_path, 'r', encoding='utf-8') as f:
    transcript = f.read()


print(f"Length: {len(transcript)} characters")
print(f"\nFirst 500 characters:")
print(transcript[:500])

Length: 3710 characters

First 500 characters:
Workshop Transcript: Digitalisierung in der Energiewirtschaft
Datum: 15. Februar 2025
Teilnehmer: BadenCampus Team mit Vertreter von E.ON
Ort: E.ON Hauptsitz, Essen

Moderator (BadenCampus): Guten Morgen allerseits. Herzlich willkommen zu unserem Workshop zur Digitalisierung in der Energiewirtschaft. Ich bin Marc von BadenCampus, und wir freuen uns sehr, heute mit E.ON zusammenzuarbeiten.

Lisa Müller (Innovation Manager, E.ON): Vielen Dank, Marc. Ich bin Lisa Müller, Innovation Manager bei E.ON


# Promt for extraction
The plan is: a good promt to design a list of good promt to hand over one by one to the agent that I have tested and is able to add usecases to the database carefully (with checking and adding and everything).

In [6]:
extraction_prompt = """
You are an expert at extracting use cases from workshop transcripts.

Your task is to read the transcript and create a natural language prompt for EACH use case 
that can be executed by an AI agent to create the use case in a database.

For each use case mentioned in the transcript, create a prompt following this template:
"Create a use case called '[TITLE]' for company '[COMPANY]' in the '[INDUSTRY]' sector. Description: [DETAILED DESCRIPTION]. Expected benefit: [SPECIFIC BENEFITS]. Contributors: [NAME (ROLE), NAME (ROLE)]"

INSTRUCTIONS:
1. Extract ALL use cases discussed in the transcript (there may be multiple)
2. For each use case, identify:
   - A clear, concise title
   - The company name (exactly as mentioned)
   - The industry/sector
   - A detailed description of what the use case does
   - The expected benefits (include metrics/percentages when mentioned)
   - All people who contributed ideas (with their roles)

3. Return ONLY a JSON array of prompt strings
4. Each prompt should be a complete, standalone instruction
5. Use exact quotes for percentages and metrics when available

EXAMPLE OUTPUT FORMAT:
[
  "Create a use case called 'Smart Grid Optimization' for company 'E.ON' in the 'Energy' sector. Description: Machine learning algorithms to optimize energy distribution in real-time based on consumption patterns and renewable energy availability. Expected benefit: Reduce energy waste by 15-20%, improve grid stability, better integration of renewable sources. Contributors: Lisa Müller (Innovation Manager), Thomas Klein (Data Science Lead)",
  "Create a use case called 'Predictive Maintenance for Wind Turbines' for company 'E.ON' in the 'Energy' sector. Description: IoT sensors combined with AI to predict maintenance needs before failures occur, reducing downtime. Expected benefit: 30% reduction in unplanned downtime, 20% longer equipment lifetime, lower maintenance costs. Contributors: Thomas Klein (Data Science Lead), Lisa Müller (Innovation Manager)"
]

CRITICAL: Return ONLY the JSON array, no other text, no markdown formatting, no preamble.
"""


# function to let the LLM run to create promts

In [7]:
def extract_prompts_from_transcript(transcript_text: str, verbose: bool = True):
    """
    Extract use case prompts from a workshop transcript.
    
    Args:
        transcript_text: The full transcript text
        verbose: Whether to print progress
        
    Returns:
        list: List of prompt strings for the agent
    """
    if verbose:
        print("\n" + "="*60)
        print("EXTRACTING USE CASE PROMPTS FROM TRANSCRIPT")
        print("="*60)
    
    # Call LLM with extraction prompt
    response = client.chat.completions.create(
        model="anthropic/claude-3.5-sonnet",
        messages=[
            {"role": "system", "content": extraction_prompt},
            {"role": "user", "content": f"Extract use case prompts from this transcript:\n\n{transcript_text}"}
        ],
        max_tokens=3000,
        temperature=0.3  # Lower temp for consistent extraction
    )
    
    result = response.choices[0].message.content
    
    if verbose:
        print("\nLLM Response:")
        print(result[:300] + "..." if len(result) > 300 else result)
    
    # Parse JSON
    try:
        # Clean markdown formatting if present
        if "```json" in result:
            result = result.split("```json")[1].split("```")[0].strip()
        elif "```" in result:
            result = result.split("```")[1].split("```")[0].strip()
        
        prompts = json.loads(result)
        
        if verbose:
            print(f"\nSuccessfully extracted {len(prompts)} use case prompt(s)")
            print("\nExtracted prompts:")
            for i, prompt in enumerate(prompts, 1):
                print(f"\n{i}. {prompt[:100]}...")
        
        return prompts
        
    except json.JSONDecodeError as e:
        print(f"\nFailed to parse JSON: {e}")
        print(f"Raw response:\n{result}")
        return []

In [8]:
# Test extraction on energy transcript
print("\nTesting extraction on energy workshop transcript...")
prompts = extract_prompts_from_transcript(transcript)


Testing extraction on energy workshop transcript...

EXTRACTING USE CASE PROMPTS FROM TRANSCRIPT

LLM Response:
[
  "Create a use case called 'Smart Grid Load Prediction' for company 'E.ON' in the 'Energy' sector. Description: AI-powered system to predict grid load 24-48 hours in advance using smart meter data from thousands of households, incorporating renewable energy inputs like solar power to better manag...

Successfully extracted 3 use case prompt(s)

Extracted prompts:

1. Create a use case called 'Smart Grid Load Prediction' for company 'E.ON' in the 'Energy' sector. Des...

2. Create a use case called 'Wind Farm Predictive Maintenance' for company 'E.ON' in the 'Energy' secto...

3. Create a use case called 'Industrial Energy Consumption Forecasting' for company 'E.ON' in the 'Ener...


# Test Complete workflow :o

In [12]:
from agent import run_agent

def process_transcript(transcript_text: str, verbose: bool = True):
    """
    Complete workflow: Extract prompts from transcript and create all use cases.
    
    Args:
        transcript_text: The workshop transcript
        verbose: Whether to print detailed progress
        
    Returns:
        dict: Summary of results
    """
    if verbose:
        print("\n" + "="*80)
        print("TRANSCRIPT PROCESSING WORKFLOW")
        print("="*80)
    
    # Step 1: Extract prompts
    prompts = extract_prompts_from_transcript(transcript_text, verbose=verbose)
    
    if not prompts:
        print("\nNo prompts extracted. Stopping.")
        return {"success": False, "prompts_extracted": 0, "use_cases_created": 0}
    
    # Step 2: Process each prompt with the agent
    if verbose:
        print("\n" + "="*80)
        print(f"CREATING {len(prompts)} USE CASE(S) VIA AGENT")
        print("="*80)
    
    results = []
    for i, prompt in enumerate(prompts, 1):
        if verbose:
            print(f"\n{'─'*80}")
            print(f"USE CASE {i}/{len(prompts)}")
            print(f"{'─'*80}")
            print(f"Prompt: {prompt[:150]}...")
            print()
        
        try:
            # Feed prompt to agent
            response = run_agent(prompt, verbose=verbose)
            results.append({"success": True, "prompt": prompt, "response": response})
            
            if verbose:
                print(f"\nUse case {i} created successfully")
        
        except Exception as e:
            if verbose:
                print(f"\nError creating use case {i}: {e}")
            results.append({"success": False, "prompt": prompt, "error": str(e)})
    
    # Summary
    successful = sum(1 for r in results if r["success"])
    
    if verbose:
        print("\n" + "="*80)
        print("TRANSCRIPT PROCESSING COMPLETE")
        print("="*80)
        print(f"Prompts extracted: {len(prompts)}")
        print(f"Use cases created: {successful}/{len(prompts)}")
        print("="*80)
    
    return {
        "success": True,
        "prompts_extracted": len(prompts),
        "use_cases_created": successful,
        "results": results
    }



In [13]:
# Process the energy transcript end-to-end
print("\n" + "="*80)
print("FULL TRANSCRIPT PROCESSING TEST")
print("="*80)

summary = process_transcript(transcript, verbose=True)

print("\n" + "="*80)
print("FINAL SUMMARY")
print("="*80)
print(f"Extracted {summary['prompts_extracted']} use case prompts")
print(f"Successfully created {summary['use_cases_created']} use cases")
print("="*80)


FULL TRANSCRIPT PROCESSING TEST

TRANSCRIPT PROCESSING WORKFLOW

EXTRACTING USE CASE PROMPTS FROM TRANSCRIPT

LLM Response:
[
  "Create a use case called 'Smart Grid Load Prediction' for company 'E.ON' in the 'Energy' sector. Description: AI-powered system to predict grid load 24-48 hours in advance using smart meter data from thousands of households, incorporating renewable energy fluctuations and solar feed-in patterns...

Successfully extracted 3 use case prompt(s)

Extracted prompts:

1. Create a use case called 'Smart Grid Load Prediction' for company 'E.ON' in the 'Energy' sector. Des...

2. Create a use case called 'Wind Farm Predictive Maintenance' for company 'E.ON' in the 'Energy' secto...

3. Create a use case called 'Industrial Energy Consumption Forecasting' for company 'E.ON' in the 'Ener...

CREATING 3 USE CASE(S) VIA AGENT

────────────────────────────────────────────────────────────────────────────────
USE CASE 1/3
─────────────────────────────────────────────────────

In [14]:
with open("test_data/transcripts/manufacturing_workshop_transcript.txt", 'r', encoding='utf-8') as f:
    manufacturing_transcript = f.read()

In [15]:
summary = process_transcript(manufacturing_transcript, verbose=True)


TRANSCRIPT PROCESSING WORKFLOW

EXTRACTING USE CASE PROMPTS FROM TRANSCRIPT

LLM Response:
[
  "Create a use case called 'AI-Optimized Laser Cutting Parameters' for company 'Trumpf GmbH' in the 'Manufacturing' sector. Description: Machine learning system that predicts optimal laser cutting parameters based on historical cutting data, considering material types, thickness, and design speci...

Successfully extracted 2 use case prompt(s)

Extracted prompts:

1. Create a use case called 'AI-Optimized Laser Cutting Parameters' for company 'Trumpf GmbH' in the 'M...

2. Create a use case called 'Real-time Production Planning Optimization' for company 'Trumpf GmbH' in t...

CREATING 2 USE CASE(S) VIA AGENT

────────────────────────────────────────────────────────────────────────────────
USE CASE 1/2
────────────────────────────────────────────────────────────────────────────────
Prompt: Create a use case called 'AI-Optimized Laser Cutting Parameters' for company 'Trumpf GmbH' in the 'Manufa

In [None]:
# Load healthcare transcript
with open("test_data/transcripts/healthcare_workshop_transcript.txt", 'r', encoding='utf-8') as f:
    healthcare_transcript = f.read()

# Process it
print("\n" + "="*80)
print("TESTING HEALTHCARE TRANSCRIPT")
print("="*80)
summary = process_transcript(healthcare_transcript, verbose=True)