In [1]:
%load_ext autoreload

In [2]:
%autoreload 2
import os
import sys
from pathlib import Path
import pyperclip
from src.story import create_html_challenges

chat_app_dir = Path().absolute().parent / "chat-app"
assert chat_app_dir.exists()
if (str(chat_app_dir)) not in sys.path:
    sys.path.append(str(chat_app_dir))
from src.config_loader import config
from src.utils import anthropic_generate, upload_to_gcs, load_json


setting voice override: sv-SE-HilleviNeural
setting voice override: sv-SE-MattiasNeural
FFmpeg path added to system PATH: C:\Program Files\ffmpeg-7.0-essentials_build\bin


In [3]:
STORY_NAME = "story_camping_trip_gone_awry"
notebook_dir = Path().absolute()  # This gives src/notebooks
phrase_dir = notebook_dir.parent / "data" / "phrases" #where we store text files of phrases
story_dir = notebook_dir.parent / "outputs" / "stories" / STORY_NAME 
chat_dir = notebook_dir.parent / "chat"
story_dialgoue_file = story_dir / f"{STORY_NAME}.json"

In [4]:
story_dialogue = load_json(story_dialgoue_file)

In [16]:
def get_challenge_generation_prompt(story_dialogue: dict) -> str:
    """
    Generate a prompt for an LLM to create language learning scenarios where learners
    must complete a challenge while gathering specific information and potentially
    handling complications.
    
    Args:
        story_dialogue: Dictionary containing dialogue parts ("setup", "resolution" etc)
        
    Returns:
        str: Formatted prompt for the LLM
    """
    
    dialogue_text = []
    for part in story_dialogue.values():
        if "dialogue" in part:
            for utterance in part["dialogue"]:
                dialogue_text.append(f'{utterance["speaker"]}: {utterance["text"]}')
    
    story_context = "\n".join(dialogue_text)
    
    prompt = f"""Analyze this dialogue and create 5 practical language learning scenarios.
Each scenario should have a main challenge plus information the learner must discover,
with possible complications that could arise.
Base the scenarios on this story context:

{story_context}

Create the scenarios in this JSON format:
{{
    "scenarios": [
        {{
            "role": "who the teacher will roleplay (e.g. coffee shop staff)",
            "context": "brief setting description",
            "challenge": "overall task to complete (e.g. 'Order a coffee')",
            "information_task": "specific information to discover (e.g. 'What is the price of a  coffee?')",
            "question": "What problem or complication prevents your request? Why isn't it possible?",
            "complications": [
                "I'm sorry, our cups are all being washed right now, but I could put it in a mug?",
                "Unfortunately our coffee machine is just warming up, if you are happy waiting and I'll bring you the order when ready?",
                "We've run out of coffee cups, but we can serve you in a takeaway cup"
            ],
            "success_criteria": "Learner discovers the price of their chosen size while handling any complications with the order"
        }}
    ]
}}

Requirements:
1. Create 5 scenarios that relate to the story provided using similar vocabulary
2. Each scenario should have:
   - A main challenge (e.g. "order a coffee")
   - An information-seeking task about the variable (e.g. "what is the price of a coffee?")
   - Three realistic complications to completing the challenge
4. Information tasks should:
   - Be discoverable through conversation
5. Complications should:
   - Clearly explain why the original request can't be fulfilled
   - Suggest or hint at possible alternatives
   - Not prevent the information task completion
6. Success criteria should:
   - Include both completing the challenge and discovering the information
   - Allow for multiple solutions
   - Include accepting alternatives

Remember:
- Keep language practical and everyday
- Every complication should suggest a way forward
- Success means both understanding any problems and gathering required information

Output only the JSON with no additional text."""

    return prompt

In [17]:
prompt = get_challenge_generation_prompt(story_dialogue)

In [18]:
pyperclip.copy(prompt)

In [20]:
test_scenarios = {
"scenarios": [
{
"role": "camping shop assistant",
"context": "Local outdoor equipment shop",
"challenge": "Purchase a tent suitable for two people",
"information_task": "Discover the minimum temperature rating for the tent",
"question": "Why isn't your first tent choice available?",
"complications": [
"That model is currently out of stock, but we have a similar one with better waterproofing for just £10 more",
"The display tent is our last one and has a small repair on the outer layer, but I can offer a 15% discount",
"This tent is slightly larger than what you asked for, but it's on sale and includes a footprint"
],
"success_criteria": "Learner obtains temperature rating information while dealing with stock limitations and makes an informed choice about alternative options"
},
{
"role": "weather station operator",
"context": "Local weather information centre",
"challenge": "Get detailed weather forecast for a specific camping location",
"information_task": "Find out the overnight temperatures and chance of rain",
"question": "Why can't you get the exact forecast for your location?",
"complications": [
"Our local station is undergoing maintenance, but I can give you data from the nearest station 5 miles away",
"The detailed forecast only goes up to tomorrow evening, but I can give you the general trend for the following days",
"We don't have specific readings for that valley, but I can tell you about the surrounding higher ground"
],
"success_criteria": "Learner obtains weather information for their trip while understanding geographical and technical limitations"
},
{
"role": "park ranger",
"context": "National park information centre",
"challenge": "Find a suitable camping spot near hiking trails",
"information_task": "Learn about available emergency shelters and their locations",
"question": "Why isn't your preferred camping area accessible?",
"complications": [
"That area is closed for wildlife protection, but there's a lovely spot just 2km further along the trail",
"Recent rainfall has made the usual path muddy, but I can show you an alternative route that takes 15 minutes longer",
"The main camping area is full, but we have overflow spots available near the ranger station"
],
"success_criteria": "Learner discovers emergency shelter locations while adapting to site availability and access restrictions"
},
{
"role": "outdoor equipment rental staff",
"context": "Camping equipment rental shop",
"challenge": "Rent a portable cooking stove",
"information_task": "Find out fuel type and usage duration",
"question": "Why isn't the standard stove rental available?",
"complications": [
"Our gas stoves are all out, but we have multi-fuel stoves that also work with liquid fuel",
"The weekend package isn't available, but I can offer you a weekly rate for the same price",
"That model needs maintenance, but I can offer you our premium model at the standard rate"
],
"success_criteria": "Learner obtains fuel information while resolving equipment availability issues and understanding alternatives"
},
{
"role": "campsite manager",
"context": "Campsite reception area",
"challenge": "Check in and find your assigned pitch",
"information_task": "Learn about quiet hours and facilities access times",
"question": "Why can't you access your booked pitch?",
"complications": [
"Recent rain has made your booked pitch waterlogged, but we have a drier spot on higher ground",
"That area is being renovated, but we can offer you a premium pitch at the standard rate",
"There's a fallen tree blocking access to that section, but we have a similar spot closer to the facilities"
],
"success_criteria": "Learner obtains campsite rules and timing information while handling site allocation changes"
}
]
}

In [21]:
def generate_roleplay_prompt(scenario_data: dict, complication_index: int = 0) -> str:
    """
    Generate a structured roleplay prompt for language learning scenarios.
    
    Args:
        scenario_data (dict): Dictionary containing scenario information including
                            role, context, challenges, and complications.
        complication_index (int): Index of the complication to use (defaults to 0)
    
    Returns:
        str: Formatted prompt template with placeholders for dynamic content
    """
    if complication_index >= len(scenario_data['complications']):
        raise ValueError(f"Complication index {complication_index} out of range. Max index: {len(scenario_data['complications']) - 1}")


    prompt_template = f"""You are a helpful {config.TARGET_LANGUAGE_NAME} language learning assistant. You will engage in roleplay scenarios to help users practice their {config.TARGET_LANGUAGE_NAME} conversation skills. Here are the details for this challenge:

[ROLEPLAY SCENARIO]
You are playing the role of: {scenario_data['role']}
Context: {scenario_data['context']}

[LEARNER'S TASK]
The learner needs to: {scenario_data['challenge']}
They must also find out (FIND_OUT): {scenario_data['information_task']}

[CORRECT INFORMATION TO PROVIDE]
When the learner asks appropriately, you should create a suitable answer to what they are trying to find out (FIND_OUT) - remembering this answer to reveal at the end of the role-play.

[COMPLICATION TO HANDLE]
The roleplay has a complication for the learner: {scenario_data['question']}
You should explain: {scenario_data['complications'][complication_index]}

[SCENARIO FLOW]
1. You should begin by greeting the learner in character
2. You should respond naturally to the learner, in character, gently guiding them to find out the answers to their questions if they are struggling.
3. If there is a complication, introduce it at the appropriate moment

[CHALLENGE: SUCCESS CONDITIONS]
{scenario_data['success_criteria']}

ROLEPLAY GUIDELINES:
1. Begin each interaction in {config.TARGET_LANGUAGE_NAME} staying in character for the scenario
2. If the learner says "PAUSE ROLEPLAY", temporarily break character to:
   - Provide relevant phrases or vocabulary in English
   - Explain the current expectation
   - Then resume the roleplay in {config.TARGET_LANGUAGE_NAME}
3. If the learner is struggling, provide subtle hints while maintaining the roleplay
4. Use simple, clear {config.TARGET_LANGUAGE_NAME} appropriate for the learner's level
5. Stay in {config.TARGET_LANGUAGE_NAME} until the success conditions are met
6. Once success conditions are met, provide constructive feedback in English about:
   - Successful language usage
   - Reveal the answer to what they had to find out (FIND_OUT)
   - Areas for improvement
   - Alternative phrases they could have used

Start by introducing yourself in character, in {config.TARGET_LANGUAGE_NAME} appropriate to the challenge context and specified role."""

    return prompt_template



In [25]:

prompt_chat = generate_roleplay_prompt(test_scenarios['scenarios'][0], 0)
pyperclip.copy(prompt_chat)


In [18]:
upload_to_gcs(file_path=chat_dir / "duolaingo_conversation_practice.html", bucket_prefix="test_challenges", )

'https://storage.googleapis.com/audio-language-trainer-stories/test_challenges/duolaingo_conversation_practice.html'