# Smart Parser

Ziel ist ein Parser, der einfache Sätze besser versteht. Eingaben sind gewöhnliche Text-Adventure-Sätze, Ausgaben sind die Commands.

Der Parser benötigt mehrere Stufen um die Syntax zu verstehen, diese mit den erlaubten Verben zu Matchen und die Objekte zu identifizieren.

Verwendet werden:
- SpaCy: Model zur Analyse und Annotation der eingegebenen Sätze. Vermutlich "de_dep_news_trf".
- SentenceTransformer: Zum Matching der Commands, vermutlich mit "paraphrase-multilingual-MiniLM-L12-v2".
- Neo4J: Zum identifizieren der Objekte.

In [None]:
import os
import spacy 
from dotenv import load_dotenv
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer, util

load_dotenv(dotenv_path='../.env')

parsing_model_trf = spacy.load("de_dep_news_trf")
matching_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

driver = GraphDatabase.driver(
    uri=os.getenv('NEO4J_URI'),
    auth=(
        os.getenv('NEO4J_USER'), 
        os.getenv('NEO4J_PASSWORD')
    )
)

print('Docker starten ;-)')
print(f'URI: {os.getenv("NEO4J_URI")}')
print(f'UI:  http://localhost:7474')

In [None]:
# model response ansehen
doc_trf = parsing_model_trf("Nimm den goldenen Schlüssel und öffne die verzauberte Truhe")

spacy.displacy.render(doc_trf, style='dep', jupyter=True)

## Testsätze

KI-generierte Sätze verschiedener Kategorien inklusive expected outcome.

In [None]:
basic = [
    {
        'sentence': "Nimm den Kristall",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Lege die Laterne ab",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Geh zur Höhle",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Untersuche den Hammer",
        'expected': {'command': 'examine', 'objects': ['hammer']}
    },
    {
        'sentence': "Lies die Gravur",
        'expected': {'command': 'read', 'objects': ['gravur']}
    },
    {
        'sentence': "Benutze den Dolch",
        'expected': {'command': 'use', 'objects': ['dolch']}
    }
]

trennbar = [
    {
        'sentence': "Nimm das Edelstein auf",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Wirf die Lampe weg",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Lauf zur Grotte",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Sieh dir das Werkzeug an",
        'expected': {'command': 'examine', 'objects': ['hammer']}
    },
    {
        'sentence': "Les die Inschrift vor",
        'expected': {'command': 'read', 'objects': ['gravur']}
    },
    {
        'sentence': "Wende das Messer an",
        'expected': {'command': 'use', 'objects': ['dolch']}
    }
]

komplex = [
    {
        'sentence': "Nimm den leuchtenden Kristall",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Lege die flackernde alte Laterne ab",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Geh in die dunkle Höhle",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Untersuche den rostigen Hammer",
        'expected': {'command': 'examine', 'objects': ['hammer']}
    },
    {
        'sentence': "Lies die alte Gravur",
        'expected': {'command': 'read', 'objects': ['gravur']}
    },
    {
        'sentence': "Öffne die Kiste mit dem scharfen Dolch",
        'expected': {'command': 'use', 'objects': ['dolch']}
    }
]

praepositionen = [
    {
        'sentence': "Hole den Edelstein aus der Spalte",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Lege die Lampe auf den Boden",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Gehe in die Grotte",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Sieh dir das Werkzeug an der Wand an",
        'expected': {'command': 'examine', 'objects': ['hammer']}
    },
    {
        'sentence': "Lies die Inschrift auf dem Stein",
        'expected': {'command': 'read', 'objects': ['gravur']}
    },
    {
        'sentence': "Öffne die Truhe mit dem Messer",
        'expected': {'command': 'use', 'objects': ['dolch']}
    }
]

synonyme = [
    {
        'sentence': "Greif nach dem glänzenden Stein",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Lass die Leuchte fallen",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Besuche die Kaverne",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Betrachte den Schmiedehammer",
        'expected': {'command': 'examine', 'objects': ['hammer']}
    },
    {
        'sentence': "Durchlese die Runen",
        'expected': {'command': 'read', 'objects': ['gravur']}
    },
    {
        'sentence': "Verwende die Klinge",
        'expected': {'command': 'use', 'objects': ['dolch']}
    }
]

schwierig = [
    {
        'sentence': "Schnapp dir den funkelnden Brocken",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Schmeiß das Licht weg",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Mach dass du zur Felsenhöhle kommst",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Guck dir das schwere Ding mal genauer an",
        'expected': {'command': 'examine', 'objects': ['hammer']}
    },
    {
        'sentence': "Was steht da drauf?",
        'expected': {'command': 'read', 'objects': []}
    },
    {
        'sentence': "Probier mal die Waffe aus",
        'expected': {'command': 'use', 'objects': ['dolch']}
    }
]

edge_cases = [
    {
        'sentence': "Nimm Kristall",
        'expected': {'command': 'take', 'objects': ['kristall']}
    },
    {
        'sentence': "Laterne ablegen",
        'expected': {'command': 'drop', 'objects': ['laterne']}
    },
    {
        'sentence': "Ich möchte zur Höhle gehen",
        'expected': {'command': 'go', 'objects': ['hoehle']}
    },
    {
        'sentence': "Kannst du dir das ansehen?",
        'expected': {'command': 'examine', 'objects': []}
    },
    {
        'sentence': "",
        'expected': None,
        'setup': None
    }
]

all_tests = {
    'basic': basic,
    'trennbar': trennbar,
    'komplex': komplex,
    'praepositionen': praepositionen,
    'synonyme': synonyme,
    'schwierig': schwierig,
    'edge_cases': edge_cases
}

## NLP Processing

In [None]:
# Parser für einzelne Items aus den Testdaten
def add_trf_parsing(items):

    doc = parsing_model_trf(items['sentence'])
    
    # Fügt Verb und Objekte den Testdaten hinzu
    items['rootverb'] = None
    items['objects'] = []
    
    for token in doc:
        # Hauptverb finden (lemma_)
        if token.dep_ == "ROOT":
            items['rootverb'] = token.lemma_
        # Objekte finden
        if token.dep_ in ['obj', 'dobj', 'oa', 'pobj']:
            items['objects'].append(token)

# Command Embedding

In [None]:
# Command-Struktur:
# Meta (hardcoded): inventory, quit, help, look
# In-World (geparst): Commands unten

command_verbs = {
    # Beobachtung
    'examine': [
        'untersuchen', 'betrachten', 'ansehen', 'anschauen', 'inspizieren', 'prüfen', 'mustern',
        'untersuch', 'betracht', 'sieh an', 'schau an', 'guck an'  # Imperativ
    ],
    'read': [
        'lesen', 'durchlesen', 'vorlesen',
        'lies', 'les'  # Imperativ
    ],
    
    # Bewegung
    'go': [
        'gehen', 'laufen', 'bewegen', 'besuchen', 'kommen',
        'geh', 'lauf', 'beweg', 'besuch', 'komm'  # Imperativ
    ],
    
    # Objekt-Interaktion
    'take': [
        'nehmen', 'holen', 'packen', 'greifen', 'schnappen', 'aufheben', 'raffen',
        'nimm', 'hol', 'pack', 'greif', 'schnapp'  # Imperativ
    ],
    'drop': [
        'ablegen', 'werfen', 'lassen', 'fallenlassen', 'wegwerfen', 'schmeißen',
        'leg ab', 'wirf', 'lass', 'schmeiß'  # Imperativ
    ],
    'use': [
        'benutzen', 'verwenden', 'anwenden', 'öffnen', 'betätigen', 'probieren', 'aktivieren',
        'benutz', 'verwend', 'wend an', 'öffne', 'probier'  # Imperativ
    ],
}

# Commands embedden
command_verb_embeddings = {}

for cmd, verbs in command_verbs.items():
    command_verb_embeddings[cmd] = matching_model.encode(verbs)

In [None]:
# Embedding und Vergleich der Verben mit den Commands
def verb_to_command(items):

    # Abbruch wenn kein Verb gefunden
    if items['rootverb'] is None:
        items['best_command'] = None        
        items['best_sim'] = 0.0
        return

    items['best_command'] = None
    items['best_sim'] = -1

    # Verb embedden
    verb_emb = matching_model.encode(items['rootverb'])   

    for cmd, embs in command_verb_embeddings.items():

        # Ähnlichkeit vergleichen
        similarities = util.cos_sim(verb_emb, embs)
        max_sim = similarities.max().item()

        # Bestes Ergebnis schreiben wenn > -1 ;-)
        if max_sim > items['best_sim']:
            items['best_sim'] = max_sim
            items['best_command'] = cmd

# Smart Parsing

"Einmal mit alles"

In [None]:
for category, tests in all_tests.items():

    for items in tests:

        add_trf_parsing(items)
        verb_to_command(items)

In [None]:
print(all_tests)

## AI Analyse

Danke Claude :)

In [None]:
# Fehleranalyse: Expected vs. Predicted
from collections import defaultdict

errors = defaultdict(lambda: defaultdict(int))

for category, tests in all_tests.items():
    for item in tests:
        if item['expected'] is None:
            continue
        
        expected = item['expected']['command']
        predicted = item.get('best_command')
        
        # Zähle alle Kombinationen (auch korrekte)
        errors[expected][predicted] += 1

# Nur Fehler anzeigen
print("\n" + "="*80)
print("FEHLERANALYSE: Expected → Predicted (nur Fehler)")
print("="*80)

total_errors = 0
for expected in sorted(errors.keys()):
    has_errors = False
    error_list = []
    
    for predicted, count in sorted(errors[expected].items()):
        if expected != predicted:  # Nur Fehler
            error_list.append(f"{predicted}({count}x)")
            total_errors += count
            has_errors = True
    
    if has_errors:
        correct = errors[expected].get(expected, 0)
        total = sum(errors[expected].values())
        print(f"\n{expected.upper():12} ({correct}/{total} korrekt)")
        print(f"  Verwechselt mit: {', '.join(error_list)}")

print(f"\n{'='*80}")
print(f"Gesamt-Fehler: {total_errors}")

# Confusion Matrix (optional, detaillierter)
print("\n" + "="*80)
print("CONFUSION MATRIX")
print("="*80)

all_commands = sorted(set(errors.keys()) | {pred for preds in errors.values() for pred in preds.keys()})

# Header
print(f"{'Expected':12}", end='')
for cmd in all_commands:
    print(f"{cmd:>10}", end='')
print()
print("-"*80)

# Rows
for expected in all_commands:
    print(f"{expected:12}", end='')
    for predicted in all_commands:
        count = errors[expected].get(predicted, 0)
        if expected == predicted and count > 0:
            print(f"{count:>10}", end='')  # Korrekte
        elif count > 0:
            print(f"\033[91m{count:>10}\033[0m", end='')  # Fehler in rot
        else:
            print(f"{'':>10}", end='')
    print()

print("\n" + "="*80)

In [None]:
# treffer ansehen
print("\n" + "="*80)
print(" "*25 + "SMART PARSER TEST REPORT")
print("="*80)

# Per Command Accuracy
from collections import defaultdict

command_stats = defaultdict(lambda: {'total': 0, 'correct': 0})

for category, tests in all_tests.items():
    for item in tests:
        if item['expected'] is None:
            continue

        expected_cmd = item['expected']['command']
        predicted_cmd = item.get('best_command')

        command_stats[expected_cmd]['total'] += 1
        if expected_cmd == predicted_cmd:
            command_stats[expected_cmd]['correct'] += 1

print("\nACCURACY PRO COMMAND:")
print('-'*80)
for cmd, stats in sorted(command_stats.items()):
    acc = 100 * stats['correct'] / stats['total'] if stats['total'] > 0 else 0
    bar = '█' * int(acc / 5)  # Visual bar
    print(f"{cmd:<10} {stats['correct']:>2}/{stats['total']:<2} ({acc:>5.1f}%) {bar}")

print("\n" + "="*80)
print(" "*25 + "ACCURANCY PRO SENTENCE")
print("="*80)

for category, tests in all_tests.items():

    print(f"\nKATEGORIE: {category.upper()}")
    print('-'*80)

    for i, item in enumerate(tests, 1):
        print(f"\n[{i}] {item['sentence']}")
        print(f"    Expected:  {item['expected']}")

        print(f"    Parsed:    verb='{item['rootverb']}', objects={item['objects']}")

        # Prüfen ob expected vorhanden ist
        if item['expected'] is not None:  # ✅
            expected_cmd = item['expected']['command']
            match_icon = '✓' if expected_cmd == item['best_command'] else '✗'
            print(f"    Predicted: {item['best_command']} (score: {item['best_sim']:.3f}) {match_icon}")
        else:
            # Kein Expected → nur Predicted ausgeben
            print(f"    Predicted: {item.get('best_command', 'N/A')} (score: {item.get('best_sim', 0):.3f}) [no expected]")



## Objekt Matching

Bezeichnung, Beschreibung und Synonyme werden/wurden embedded und sollen genutzt werden.

In [None]:

# Queryhelper
def run_query(query, params=None):
    
    # Ergebnisliste
    results = []

    # Session öffnen
    with driver.session() as session:

        # Query
        result = session.run(query, params or {})

        # Ergebnisse wegspeichern
        records = [r.data() for r in result]
        results.extend(records)

    return results

# Objektsuche
def find_object_in_db(verb, object):

    emb_object = matching_model.encode(object)

    query = """
        MATCH (p:Player {id: 'testplayer})-[:IST_IN]->(loc:Location)
        CALL db.index.vector.queryNodes('item_name_index', 5, $object_embedding)
        YIELD node AS item, score
        WHERE (item)-[:IST_IN]->(loc)
        RETURN item.id, item.name, score
        LIMIT 1
        """
    
    params = {'object_embedding', emb_object.tolist()}

    run_query(query, params)

In [None]:
for category, tests in all_tests.items():

    for items in tests:

        find_object_in_db(items)
