In [11]:
import json
import re
from docx import Document

def extract_scenarios(docx_path, output_json):
    scenarios = []
    doc = Document(docx_path)

    print(f"Total paragraphs found: {len(doc.paragraphs)}")

    pattern = re.compile(r'^Scenario\s*(\d+)[\.:]?\s*(.+)$')

    current_scenario = None

    for para in doc.paragraphs:
        line = para.text.strip()

        if not line:
            continue  # Skip empty lines

        print(f"Processing line: '{line}'")  # Debugging output

        match = pattern.match(line)
        if match:
            # Save the previous scenario before starting a new one
            if current_scenario:
                print(f"Adding scenario: {current_scenario['number']} - {current_scenario['title']}")  # Debugging
                scenarios.append(current_scenario)

            scenario_number = int(match.group(1))  # Extract number
            scenario_title = match.group(2).strip()  # Extract title

            current_scenario = {
                "number": scenario_number,
                "title": scenario_title,
                "description": []  # Description is an empty list initially
            }
            print(f"New scenario found: {scenario_number} - {scenario_title}")  # Debugging
        elif current_scenario:
            # Remove any quotation marks if present
            cleaned_line = line.strip('“”')  # Strip out leading and trailing quotation marks
            current_scenario["description"].append(cleaned_line)
            print(f"Description added: {cleaned_line}")  # Debugging

    # After the loop, add the last scenario
    if current_scenario:
        print(f"Adding final scenario: {current_scenario['number']} - {current_scenario['title']}")  # Debugging
        scenarios.append(current_scenario)

    # Debugging output: Check final scenarios before saving
    print(f"Final extracted scenarios: {json.dumps(scenarios, indent=4, ensure_ascii=False)}")

    # Save to JSON
    with open(output_json, 'w', encoding='utf-8') as json_file:
        json.dump(scenarios, json_file, indent=4, ensure_ascii=False)

    print(f"Extracted scenarios saved to: {output_json}")
    return scenarios

# Example usage
file_path = "C:/my place/PhD Scientific/PhD. Papers/ACL 2025/New Approach/Scenario list/Scenarios.docx"
output_json = "scenarios.json"
scenarios = extract_scenarios(file_path, output_json)


Total paragraphs found: 202
Processing line: 'Scenario 1: Running a hairdressing salon'
New scenario found: 1 - Running a hairdressing salon
Processing line: '“Avery and Jordan are running a hairdressing salon together. Both take on key roles—Avery focusing on styling hair and Jordan managing operations. Avery consults clients, cuts and colors hair, and stays updated on trends. Jordan schedules appointments, orders supplies, and ensures a smooth workflow. Avery experiments with techniques, recommends treatments, and personalizes styles. Jordan handles customer service, manages finances, and promotes the salon. They collaborate on marketing, refine services based on feedback, and work together to create a welcoming atmosphere.”'
Description added: Avery and Jordan are running a hairdressing salon together. Both take on key roles—Avery focusing on styling hair and Jordan managing operations. Avery consults clients, cuts and colors hair, and stays updated on trends. Jordan schedules appoi

In [12]:
import json

with open("scenarios.json", "r", encoding="utf-8") as file:
    data = json.load(file)

data  # Displays JSON content

[{'number': 1,
  'title': 'Running a hairdressing salon',
  'description': ['Avery and Jordan are running a hairdressing salon together. Both take on key roles—Avery focusing on styling hair and Jordan managing operations. Avery consults clients, cuts and colors hair, and stays updated on trends. Jordan schedules appointments, orders supplies, and ensures a smooth workflow. Avery experiments with techniques, recommends treatments, and personalizes styles. Jordan handles customer service, manages finances, and promotes the salon. They collaborate on marketing, refine services based on feedback, and work together to create a welcoming atmosphere.']},
 {'number': 2,
  'title': 'Startup Leadership',
  'description': ["Jordan and Taylor co-found a tech startup. Both are involved in the company's growth—Jordan focuses on securing funding, managing investor relations, overseeing marketing, and tracking expenses, while also driving business development. Taylor leads software development, tests