## 1. Setup

Import the tokenizer and configure paths.

In [24]:
import sys
sys.path.insert(0, '/home/stellar/PublicRepo/YMHY/anime_streamer_2_0')

from cores.flow_core.tokenizer import Tokenizer
from cores.flow_core.models import TokenType, Token

# Create a tokenizer instance
tokenizer = Tokenizer()

print("✓ Tokenizer imported successfully")
print(f"Available token types: {[t.name for t in TokenType]}")

✓ Tokenizer imported successfully
Available token types: ['COMMENT', 'IMPORT', 'NODE_DEF', 'NODE_REF', 'FORWARD_REF', 'FILE_REF', 'PIPE', 'DOT_END', 'ASSIGN', 'STRING_TRIM', 'STRING_PRESERVE', 'IDENTIFIER', 'PARAM_NAME', 'EOF']


## 2. Basic Tokenization

A simple example showing how source text becomes tokens.

In [25]:
tokenizer = Tokenizer()
print("✓ Tokenizer instance created successfully")

✓ Tokenizer instance created successfully


In [26]:
# Simple hello world example
source = '''
@greeting |<<<Hello, World!>>>|.
@out |$greeting|.
'''

tokens = tokenizer.tokenize(source)

print(f"Source: {len(source)} chars -> {len(tokens)} tokens\n")
for token in tokens:
    print(f"  {token.type.name:15} | {token.value!r:30} | line {token.line}, col {token.column}")

Source: 52 chars -> 9 tokens

  NODE_DEF        | 'greeting'                     | line 2, col 1
  PIPE            | '|'                            | line 2, col 11
  STRING_TRIM     | 'Hello, World!'                | line 2, col 12
  DOT_END         | '|.'                           | line 2, col 31
  NODE_DEF        | 'out'                          | line 3, col 1
  PIPE            | '|'                            | line 3, col 6
  NODE_REF        | 'greeting'                     | line 3, col 7
  DOT_END         | '|.'                           | line 3, col 16
  EOF             | ''                             | line 4, col 1


## 3. Token Type Examples

Let's explore each token type individually.

### 3.1 Comments (`#`)

In [27]:
source = '''
# This is a comment
# Comments are ignored by the parser
@node |<<<Content>>>|.
'''

tokens = tokenizer.tokenize(source)
for token in tokens:
    print(f"{token.type.name:15} -> {token.value!r}")

COMMENT         -> '# This is a comment'
COMMENT         -> '# Comments are ignored by the parser'
NODE_DEF        -> 'node'
PIPE            -> '|'
STRING_TRIM     -> 'Content'
DOT_END         -> '|.'
EOF             -> ''


### 3.2 Node Definitions and References

In [28]:
source = '''
@first |<<<First node>>>|.
@second |<<<Uses: >>>|$first|.
@third |<<<Forward ref: >>>|^later|.
@later |<<<Defined later>>>|.
'''

tokens = tokenizer.tokenize(source)
print("Reference tokens:")
for token in tokens:
    if token.type in [TokenType.NODE_DEF, TokenType.NODE_REF, TokenType.FORWARD_REF]:
        print(f"  {token.type.name:15} -> {token.value!r}")

Reference tokens:
  NODE_DEF        -> 'first'
  NODE_DEF        -> 'second'
  NODE_REF        -> 'first'
  NODE_DEF        -> 'third'
  FORWARD_REF     -> 'later'
  NODE_DEF        -> 'later'


### 3.3 File References (`++path`)

In [29]:
source = '''
@docs |<<<Documentation: >>>|++./README.md|.
@config |<<<Config: >>>|++./config.yaml|.
'''

tokens = tokenizer.tokenize(source)
print("File reference tokens:")
for token in tokens:
    if token.type == TokenType.FILE_REF:
        print(f"  {token.type.name:15} -> {token.value!r}")

File reference tokens:
  FILE_REF        -> './README.md'
  FILE_REF        -> './config.yaml'


### 3.4 Import Statements (`+path`)

In [30]:
source = '''
+./common.flow |.
+./utils.flow |$helper|.
'''

tokens = tokenizer.tokenize(source)
print("Import tokens:")
for token in tokens:
    if token.type == TokenType.IMPORT:
        print(f"  {token.type.name:15} -> {token.value!r}")

Import tokens:
  IMPORT          -> './common.flow'
  IMPORT          -> './utils.flow'


### 3.5 String Blocks (Trim vs Preserve)

In [31]:
source = '''
@trimmed |<<<   Trimmed whitespace   >>>|.
@preserved |<<   Preserved whitespace   >>|.
'''

tokens = tokenizer.tokenize(source)
print("String block tokens:")
for token in tokens:
    if token.type in [TokenType.STRING_TRIM, TokenType.STRING_PRESERVE]:
        print(f"  {token.type.name:15} -> {token.value!r}")

String block tokens:
  STRING_TRIM     -> 'Trimmed whitespace'
  STRING_PRESERVE -> '   Preserved whitespace   '


## 4. Tokenize Sample Files

Let's tokenize some of the sample `.flow` files.

In [32]:
from pathlib import Path

# Read and tokenize the hello.flow sample
sample_path = Path('./playground/samples/hello.flow')
print(f"pwd: {Path.cwd()}")

if sample_path.exists():
    source = sample_path.read_text()
    print(f"=== {sample_path.name} ===")
    print(source)
    print("\n=== Tokens ===")
    tokens = tokenizer.tokenize(source)
    for token in tokens:
        print(f"  {token.type.name:15} -> {token.value!r}")
else:
    print(f"Sample file not found: {sample_path}")

pwd: /home/stellar/PublicRepo/YMHY/anime_streamer_2_0/cores/flow_core
=== hello.flow ===
# Simple greeting node
# This is the most basic FLOW file example

@greeting |style.title=<<Hello>>|<<<Welcome to FLOW!>>>|.

@out |$greeting|.


=== Tokens ===
  COMMENT         -> '# Simple greeting node'
  COMMENT         -> '# This is the most basic FLOW file example'
  NODE_DEF        -> 'greeting'
  PIPE            -> '|'
  IDENTIFIER      -> 'style.title'
  ASSIGN          -> '='
  STRING_PRESERVE -> 'Hello'
  PIPE            -> '|'
  STRING_TRIM     -> 'Welcome to FLOW!'
  DOT_END         -> '|.'
  NODE_DEF        -> 'out'
  PIPE            -> '|'
  NODE_REF        -> 'greeting'
  DOT_END         -> '|.'
  EOF             -> ''


In [33]:
# Read and tokenize the refs.flow sample (demonstrates all reference types)
sample_path = Path('./playground/samples/refs.flow')
if sample_path.exists():
    source = sample_path.read_text()
    print(f"=== {sample_path.name} ===")
    print(source[:500] + "..." if len(source) > 500 else source)
    print("\n=== Token Summary ===")
    tokens = tokenizer.tokenize(source)
    
    # Count by type
    from collections import Counter
    type_counts = Counter(t.type.name for t in tokens)
    for token_type, count in type_counts.most_common():
        print(f"  {token_type:20} -> {count}")
else:
    print(f"Sample file not found: {sample_path}")

=== refs.flow ===
# Reference types demonstration
# Shows $ref, ^forward ref, and ++file ref

# Node reference ($) - references an existing node
@greeting |<<<Hello!>>>|.

@welcome |<<<Welcome message: >>>|$greeting|.

# Forward reference (^) - references a node defined later
@preview |<<<Coming up: >>>|^conclusion|.

# File reference (++) - embeds external file content
@resources |<<<Documentation: >>>|++./docs/guide.md|.

# Mixed references in one node
@combined |<<<Start>>>|$greeting|<<<Middle>>>|^ending|<<<Se...

=== Token Summary ===
  PIPE                 -> 16
  STRING_TRIM          -> 9
  COMMENT              -> 8
  NODE_DEF             -> 8
  DOT_END              -> 8
  NODE_REF             -> 3
  FORWARD_REF          -> 2
  FILE_REF             -> 2
  EOF                  -> 1


## 5. Interactive Tokenizer

Use this cell to experiment with your own Flow source code.

In [34]:
# Your FLOW source here
source = '''
@greeting |<<<Hello!>>>|.
@out |$greeting|.
'''

try:
    tokens = tokenizer.tokenize(source)
    print(f"Tokenized: {len(tokens)} tokens\n")
    for token in tokens:
        print(f"  {token.type.name:15} | {token.value!r}")
except Exception as e:
    print(f"Error: {e}")

Tokenized: 9 tokens

  NODE_DEF        | 'greeting'
  PIPE            | '|'
  STRING_TRIM     | 'Hello!'
  DOT_END         | '|.'
  NODE_DEF        | 'out'
  PIPE            | '|'
  NODE_REF        | 'greeting'
  DOT_END         | '|.'
  EOF             | ''
