This file is just here to play and prepare the test, do not care about the content

# TEST 1

In [None]:
import re
import time


def extract_blocks_find(
    text, start_marker, end_marker, include_start=True, include_end=True
):
    """Original implementation using str.find()"""
    blocks = []
    start_pos = 0

    while True:
        start_idx = text.find(start_marker, start_pos)
        if start_idx == -1:
            break

        end_idx = text.find(end_marker, start_idx + len(start_marker))
        if end_idx == -1:
            break

        extract_start = start_idx if include_start else start_idx + len(start_marker)
        extract_end = end_idx + len(end_marker) if include_end else end_idx

        blocks.append(text[extract_start:extract_end])
        start_pos = end_idx

    return blocks


def extract_blocks_regex(
    text, start_marker, end_marker, include_start=True, include_end=True
):
    """Using regex - single pass through text"""
    escaped_start = re.escape(start_marker)
    escaped_end = re.escape(end_marker)

    if include_start and include_end:
        pattern = f"{escaped_start}.*?{escaped_end}"
        return re.findall(pattern, text, re.DOTALL)
    elif include_start and not include_end:
        pattern = f"{escaped_start}.*?(?={escaped_end})"
        return re.findall(pattern, text, re.DOTALL)
    elif not include_start and include_end:
        pattern = f"(?<={re.escape(start_marker)}).*?{escaped_end}"
        return re.findall(pattern, text, re.DOTALL)
    else:
        pattern = f"(?<={re.escape(start_marker)}).*?(?={escaped_end})"
        return re.findall(pattern, text, re.DOTALL)


# def extract_blocks_split_fixed(text, start_marker, end_marker, include_start=True, include_end=True):
#     """Fixed str.split() implementation"""
#     if start_marker not in text:
#         return []

#     # Split by start marker, skip the first part (before any start marker)
#     parts = text.split(start_marker)
#     blocks = []

#     for part in parts[1:]:  # Skip first part
#         # Find the first occurrence of end marker in this part
#         end_idx = part.find(end_marker)
#         if end_idx != -1:
#             content = part[:end_idx]

#             # Build the result based on include flags
#             result = ""
#             if include_start:
#                 result += start_marker
#             result += content
#             if include_end:
#                 result += end_marker

#             blocks.append(result)

#     return blocks


def extract_blocks_manual_scan(
    text, start_marker, end_marker, include_start=True, include_end=True
):
    """Manual character-by-character scan - potentially fastest for simple cases"""
    blocks = []
    i = 0
    text_len = len(text)
    start_len = len(start_marker)
    end_len = len(end_marker)

    while i < text_len:
        # Look for start marker
        if text[i : i + start_len] == start_marker:
            start_pos = i
            i += start_len

            # Look for end marker
            while i <= text_len - end_len:
                if text[i : i + end_len] == end_marker:
                    end_pos = i

                    # Extract based on include flags
                    extract_start = (
                        start_pos if include_start else start_pos + start_len
                    )
                    extract_end = end_pos + end_len if include_end else end_pos

                    blocks.append(text[extract_start:extract_end])
                    i = end_pos + end_len
                    break
                i += 1
            else:
                # No end marker found, move to next character
                i = start_pos + 1
        else:
            i += 1

    return blocks


def extract_blocks_compiled_regex(
    text, start_marker, end_marker, include_start=True, include_end=True
):
    """Pre-compiled regex - faster for repeated use"""
    escaped_start = re.escape(start_marker)
    escaped_end = re.escape(end_marker)

    if include_start and include_end:
        pattern = re.compile(f"{escaped_start}.*?{escaped_end}", re.DOTALL)
        return pattern.findall(text)
    elif include_start and not include_end:
        pattern = re.compile(f"{escaped_start}.*?(?={escaped_end})", re.DOTALL)
        return pattern.findall(text)
    elif not include_start and include_end:
        pattern = re.compile(
            f"(?<={re.escape(start_marker)}).*?{escaped_end}", re.DOTALL
        )
        return pattern.findall(text)
    else:
        pattern = re.compile(
            f"(?<={re.escape(start_marker)}).*?(?={escaped_end})", re.DOTALL
        )
        return pattern.findall(text)


def test_correctness():
    """Test all functions produce the same correct results"""
    test_cases = [
        (
            "prefix *start content end* suffix",
            "*start",
            "end*",
            True,
            True,
            ["*start content end*"],
        ),
        ("*start content end*", "*start", "end*", True, False, ["*start content "]),
        ("*start content end*", "*start", "end*", False, True, [" content end*"]),
        ("*start content end*", "*start", "end*", False, False, [" content "]),
        (
            "*start block1 end* middle *start block2 end*",
            "*start",
            "end*",
            True,
            True,
            ["*start block1 end*", "*start block2 end*"],
        ),
        ("no markers here", "*start", "end*", True, True, []),
        ("*start no end", "*start", "end*", True, True, []),
        ("no start end*", "*start", "end*", True, True, []),
        (
            "*\tblock1\n\tresponse1\nend*",
            "*",
            "end*",
            True,
            True,
            ["*\tblock1\n\tresponse1\nend*"],
        ),
        # Tricky case: end marker appears multiple times
        (
            "*start content end* and end* again",
            "*start",
            "end*",
            True,
            True,
            ["*start content end*"],
        ),
    ]

    functions = [
        ("str.find()", extract_blocks_find),
        ("regex", extract_blocks_regex),
        # ("str.split() fixed", extract_blocks_split_fixed),
        ("manual scan", extract_blocks_manual_scan),
        ("compiled regex", extract_blocks_compiled_regex),
    ]

    print("=== CORRECTNESS TESTS ===")
    all_passed = True

    for i, (text, start, end, inc_start, inc_end, expected) in enumerate(test_cases):
        print(f"\nTest {i + 1}: {repr(text[:50])}{'...' if len(text) > 50 else ''}")
        print(f"Expected: {expected}")

        for name, func in functions:
            try:
                result = func(text, start, end, inc_start, inc_end)
                status = "✓ PASS" if result == expected else "✗ FAIL"
                if result != expected:
                    all_passed = False
                    print(f"  {name:15}: {status} -> {result}")
                else:
                    print(f"  {name:15}: {status}")
            except Exception as e:
                print(f"  {name:15}: ✗ ERROR -> {e}")
                all_passed = False

    print(f"\nOverall correctness: {'✓ ALL PASSED' if all_passed else '✗ SOME FAILED'}")
    return all_passed


def benchmark_performance():
    """Benchmark performance of all functions"""
    print("\n=== PERFORMANCE TESTS ===")

    test_text = (
        """
    "What's that?" my master asked.
    *	"I am somewhat tired[."]," I repeated.
    	"Really," he responded. "How deleterious."
    *	"Nothing, Monsieur!"[] I replied.
    	"Very good, then."
    *  "I said, this journey is appalling[."] and I want no more of it."
    	"Ah," he replied, not unkindly. "I see you are feeling frustrated."
    """
        * 200
    )

    functions = [
        ("str.find()", extract_blocks_find),
        ("regex", extract_blocks_regex),
        # ("str.split() fixed", extract_blocks_split_fixed),
        ("manual scan", extract_blocks_manual_scan),
        ("compiled regex", extract_blocks_compiled_regex),
    ]

    test_params = ("*", "*", True, True)
    print(f"Test data: {len(test_text)} characters")

    # Verify correctness first
    reference_result = None
    for name, func in functions:
        try:
            result = func(test_text, *test_params)
            if reference_result is None:
                reference_result = result
            elif result != reference_result:
                print(f"⚠️  {name} produces different result!")
            print(f"{name:15}: Found {len(result)} blocks")
        except Exception as e:
            print(f"{name:15}: ERROR - {e}")

    print("\nTiming (100 iterations each):")
    for name, func in functions:
        try:
            start_time = time.time()
            for _ in range(100):
                func(test_text, *test_params)
            end_time = time.time()

            avg_time = (end_time - start_time) * 10
            print(f"{name:15}: {avg_time:.2f}ms avg")
        except:
            print(f"{name:15}: SKIPPED (failed correctness)")


if __name__ == "__main__":
    if test_correctness():
        benchmark_performance()
    else:
        print("\n⚠️  Skipping performance tests due to correctness failures")

In [None]:
import re
import time
import random


def generate_large_ink_text(num_blocks=5000):
    """Generate a large ink story text with many choice blocks"""

    # Base story components
    dialogue_templates = [
        '"What do you think about {topic}?" {character} asked.',
        '"{response}" I replied, feeling {emotion}.',
        "{character} looked at me with {expression}.",
        '"The situation with {situation} is troubling," {character} mentioned.',
        '"I must admit, {confession}," I said quietly.',
    ]

    choice_templates = [
        '"I {action} {object}[."]," I {verb}.',
        '"Nothing, {title}!"[] I replied.',
        '"I said, {statement}[."] and I want no more of it."',
        '"Perhaps we should {suggestion}[?"]"',
        '"The {noun} is {adjective}[!"]"',
    ]

    response_templates = [
        '"{response}," {character} responded. "{followup}."',
        '"Really," {character} said. "How {adjective}."',
        '"Very good, then."',
        '"Ah," {character} replied, not unkindly. "I see you are feeling {emotion}."',
        '"Indeed," {character} murmured. "{observation}."',
    ]

    # Word banks
    words = {
        "topic": [
            "the weather",
            "our journey",
            "the situation",
            "current events",
            "the mission",
        ],
        "character": ["my master", "he", "she", "the stranger", "the captain"],
        "emotion": ["tired", "frustrated", "hopeful", "concerned", "relieved"],
        "expression": [
            "curiosity",
            "concern",
            "amusement",
            "suspicion",
            "understanding",
        ],
        "situation": [
            "the rebellion",
            "the storm",
            "our supplies",
            "the route",
            "the delay",
        ],
        "confession": [
            "I am quite worried",
            "this troubles me deeply",
            "I have doubts",
            "I fear the worst",
        ],
        "action": ["disagree with", "question", "support", "oppose", "doubt"],
        "object": ["this plan", "your decision", "the strategy", "their approach"],
        "verb": ["stated", "declared", "announced", "proclaimed", "admitted"],
        "title": ["Monsieur", "Sir", "Captain", "my lord", "Doctor"],
        "statement": [
            "this journey is appalling",
            "I cannot continue",
            "we must turn back",
            "this is madness",
        ],
        "suggestion": [
            "reconsider",
            "take a break",
            "change course",
            "rest here",
            "seek shelter",
        ],
        "noun": ["weather", "path", "situation", "decision", "plan"],
        "adjective": [
            "concerning",
            "deleterious",
            "troubling",
            "unexpected",
            "remarkable",
        ],
        "response": ["Indeed", "Certainly", "Of course", "Naturally", "Quite so"],
        "followup": [
            "That is most unfortunate",
            "We shall persevere",
            "Things will improve",
            "All will be well",
        ],
        "observation": [
            "Tomorrow will be better",
            "These things happen",
            "Patience is required",
            "We must endure",
        ],
    }

    def fill_template(template, words_dict):
        result = template
        for category, word_list in words_dict.items():
            if "{" + category + "}" in result:
                result = result.replace("{" + category + "}", random.choice(word_list))
        return result

    # Generate the story
    story_parts = []

    # Add opening narrative
    story_parts.append(fill_template(random.choice(dialogue_templates), words))

    # Generate many choice blocks
    for i in range(num_blocks):
        # Add choice line with *
        choice = "*\t" + fill_template(random.choice(choice_templates), words)
        story_parts.append(choice)

        # Add 1-3 response lines (indented)
        num_responses = random.randint(1, 3)
        for _ in range(num_responses):
            response = "\t" + fill_template(random.choice(response_templates), words)
            story_parts.append(response)

        # Occasionally add some narrative between choices
        if random.random() < 0.3:
            narrative = fill_template(random.choice(dialogue_templates), words)
            story_parts.append(narrative)

    return "\n".join(story_parts)


def benchmark_performance():
    """Comprehensive performance benchmark with large text"""
    print("=== LARGE SCALE PERFORMANCE BENCHMARK ===\n")

    # Test different text sizes
    test_sizes = [
        (1000, "Small"),
        (5000, "Medium"),
        (20000, "Large"),
        (50000, "Very Large"),
    ]

    functions = [
        ("str.find()", extract_blocks_find),
        ("regex", extract_blocks_regex),
        ("manual scan", extract_blocks_manual_scan),
        ("compiled regex", extract_blocks_compiled_regex),
    ]

    # Test different scenarios
    scenarios = [
        ("*", "\n*", True, False, "Ink choices (start only)"),
        ("*", "\n*", True, True, "Ink choices (both markers)"),
        ("<tag>", "</tag>", False, False, "XML-like content only"),
        ("[START]", "[END]", True, True, "Custom markers (both)"),
    ]

    for num_blocks, size_name in test_sizes:
        print(f"--- {size_name} Text ({num_blocks:,} choice blocks) ---")

        # Generate test text
        test_text = generate_large_ink_text(num_blocks)
        text_size = len(test_text)
        print(f"Generated text: {text_size:,} characters")

        for start_marker, end_marker, inc_start, inc_end, scenario_name in scenarios:
            print(f"\nScenario: {scenario_name}")
            print(f"Markers: {repr(start_marker)} -> {repr(end_marker)}")

            # First, verify all functions produce same results
            results = {}
            for name, func in functions:
                try:
                    result = func(
                        test_text, start_marker, end_marker, inc_start, inc_end
                    )
                    results[name] = len(result)
                except Exception as e:
                    results[name] = f"ERROR: {e}"

            # Check consistency
            valid_results = [r for r in results.values() if isinstance(r, int)]
            if len(set(valid_results)) > 1:
                print("⚠️  WARNING: Inconsistent results!")
                for name, result in results.items():
                    print(f"  {name}: {result}")
                continue
            else:
                blocks_found = valid_results[0] if valid_results else 0
                print(f"Blocks found: {blocks_found:,}")

            # Benchmark each function
            iterations = max(
                1, 50 // (num_blocks // 1000 + 1)
            )  # Fewer iterations for larger texts
            print(f"Timing ({iterations} iterations):")

            timings = []
            for name, func in functions:
                if isinstance(results[name], str):  # Skip if error
                    print(f"  {name:15}: SKIPPED")
                    continue

                try:
                    start_time = time.time()
                    for _ in range(iterations):
                        func(test_text, start_marker, end_marker, inc_start, inc_end)
                    end_time = time.time()

                    avg_time = (
                        (end_time - start_time) * 1000 / iterations
                    )  # ms per call
                    timings.append((name, avg_time))
                    print(f"  {name:15}: {avg_time:6.2f}ms")
                except Exception as e:
                    print(f"  {name:15}: ERROR - {e}")

            # Show relative performance
            if timings:
                fastest_time = min(time for _, time in timings)
                print("  Relative speed:")
                for name, avg_time in timings:
                    ratio = avg_time / fastest_time
                    print(
                        f"    {name:13}: {ratio:4.1f}x {'(fastest)' if ratio == 1.0 else ''}"
                    )

        print("\n" + "=" * 60)

    print("\n=== SUMMARY ===")
    print("For ink story parsing, recommended approach:")
    print("1. str.find() - Most reliable, good performance")
    print("2. compiled regex - Fast for repeated use with same markers")
    print("3. manual scan - Potentially fastest for simple cases")
    print("4. regex - Good balance, single-use scenarios")

In [None]:
benchmark_performance()

In [None]:
test_text = generate_large_ink_text(4)

In [None]:
print(test_text)

In [None]:
functions = [
    ("str.find()", extract_blocks_find),
    ("regex", extract_blocks_regex),
    ("manual scan", extract_blocks_manual_scan),
    ("compiled regex", extract_blocks_compiled_regex),
]

In [None]:
for name, func in functions:
    result = func(test_text, "*", "\n*", True, False)
    print(name, len(result), result)

In [None]:
def extract_blocks_find(
    text,
    start_marker,
    end_marker,
    include_start=True,
    include_end=False,
    end_string_is_marker: bool = True,
):
    """Original implementation using str.find()"""
    blocks = []
    start_pos = 0

    while True:
        start_idx = text.find(start_marker, start_pos)
        if start_idx == -1:
            break

        end_idx = text.find(end_marker, start_idx + len(start_marker))
        if end_idx == -1:
            if end_string_is_marker:
                extract_end = len(text)
            else:
                break
        else:
            extract_end = end_idx + len(end_marker) if include_end else end_idx

        extract_start = start_idx if include_start else start_idx + len(start_marker)

        blocks.append(text[extract_start:extract_end])
        start_pos = end_idx

    return blocks

In [None]:
test_text = generate_large_ink_text(4)

In [None]:
len(
    extract_blocks_find(
        test_text,
        "*",
        "\n*",
        include_start=True,
        include_end=False,
        end_string_is_marker=False,
    )
)

In [None]:
len(
    extract_blocks_find(
        test_text,
        "*",
        "\n*",
        include_start=True,
        include_end=False,
        end_string_is_marker=True,
    )
)

In [None]:
len(
    extract_blocks_find(
        test_text,
        "*",
        "*",
        include_start=True,
        include_end=False,
        end_string_is_marker=True,
    )
)

In [None]:
for res in extract_blocks_find(
    test_text,
    "*",
    "*",
    include_start=True,
    include_end=False,
    end_string_is_marker=True,
):
    print(res)

# PARSER

In [None]:
from analink.parser.node import clean_lines
from analink.parser.graph_story import parse_story, graph_to_mermaid

In [None]:
ink_code = """
- A A
*	AB
*	AC
- B B
"""
ink_code = """
- A
*	B
    C
	* * 	AA
    BB
			* * * 	AAA
			* * *  BBB
			- - - 	CCC
			* * *	DDD
					EEE
			* * *	FFF
			* * * 	GGG
	* * 	CC
	- - 	DD
*	C
-  D
"""

In [None]:
nodes = clean_lines(ink_code)
edges = parse_story(nodes)

In [None]:
print(graph_to_mermaid(nodes,edges))

In [None]:
app_lines = {}
for k in nodes:
    app_lines[k]={"text": nodes[k].content, "level": nodes[k].level, "node_type": nodes[k].node_type}

In [None]:
app_lines

In [None]:
ink_code = """
"What's that?" my master asked.
*	"I am somewhat tired[."]," I repeated.
	"Really," he responded. "How deleterious."
*	"Nothing, Monsieur!"[] I replied.
	"Very good, then."
*  "I said, this journey is appalling[."] and I want no more of it."
	"Ah," he replied, not unkindly. "I see you are feeling frustrated. Tomorrow, things will improve."
"""
lines = clean_lines(ink_code)

In [None]:
lines

In [None]:
container = parse_story(lines)

In [None]:
print(graph_to_mermaid(lines, container))

In [None]:
ink_code = """
- I looked at Monsieur Fogg
*	... and I could contain myself no longer.
	'What is the purpose of our journey, Monsieur?'
	'A wager,' he replied.
	* * 	'A wager!'[] I returned.
    He nodded.
			* * * 	'But surely that is foolishness!'
			* * *  'A most serious matter then!'
			- - - 	He nodded again.
			* * *	'But can we win?'
					'That is what we will endeavour to find out,' he answered.
			* * *	'A modest wager, I trust?'
					'Twenty thousand pounds,' he replied, quite flatly.
			* * * 	I asked nothing further of him then[.], and after a final, polite cough, he offered nothing more to me. <>
	* * 	'Ah[.'],' I replied, uncertain what I thought.
	- - 	After that, <>
*	... but I said nothing[] and <>
- we passed the day in silence.
// - -> END
"""

# TEXTUAL STUFF

In [None]:
from analink.parser.node import clean_lines, NodeType
from analink.parser.graph_story import parse_story, graph_to_mermaid
import networkx as nx

In [None]:
ink_code = """
"What's that?" my master asked.
*	"I am somewhat tired[."]," I repeated.
	"Really," he responded. "How deleterious."
*	"Nothing, Monsieur!"[] I replied.
	"Very good, then."
*  "I said, this journey is appalling[."] and I want no more of it."
	"Ah," he replied, not unkindly. "I see you are feeling frustrated. Tomorrow, things will improve."
"""
nodes = clean_lines(ink_code)
edges = parse_story(nodes)

In [None]:
graph = nx.DiGraph(edges)

In [None]:
graph

In [None]:
def get_choice_nodes(node_ids: list[int]):
    choice_nodes = []
    for node_id in node_ids:
        if node_id in nodes and nodes[node_id].node_type == NodeType.CHOICE:
            choice_nodes.append(nodes[node_id])
    return choice_nodes

In [None]:
for node_id in graph:
    print(node_id, list(graph.successors(node_id)), get_choice_nodes(list(graph.successors(node_id))))

# KNOT

In [None]:
from analink.parser.node import clean_lines, Node
from analink.parser.graph_story import parse_knot, parse_story, graph_to_mermaid
from analink.core.story_engine import StoryEngine

In [None]:
Node.reset_id_counter()
ink_code_1 ="""=== back_in_london ===

We arrived into London at 9.45pm exactly.

*	"There is not a moment to lose!"[] I declared.
	-> hurry_outside

*	"Monsieur, let us savour this moment!"[] I declared.
	My master clouted me firmly around the head and dragged me out of the door.
	-> dragged_outside

*	[We hurried home] -> hurry_outside


=== hurry_outside ===
We hurried home to Savile Row -> as_fast_as_we_could


=== dragged_outside ===
He insisted that we hurried home to Savile Row
-> as_fast_as_we_could


=== as_fast_as_we_could ===
<> as fast as we could."""

In [None]:
raw_story = clean_lines(ink_code_1)

In [None]:
parse_story(raw_story)

In [None]:
story_engine = StoryEngine(ink_code_1)

In [None]:
story_engine.nodes

In [None]:
story_engine.edges

In [None]:
next_node_ids = story_engine._get_next_nodes(story_engine.current_node_id)

In [None]:
story_engine.current_node_id

In [None]:
next_node_ids

In [None]:
story_engine.current_node_id

In [None]:
raw_story = clean_lines(ink_code_1)

In [None]:
nodes, edges = parse_story_v2(raw_story)

In [None]:
print(graph_to_mermaid(nodes, edges))

In [None]:
raw_story.get_node(5)