From b2ba512d4cd906292ec166f082802b0ee31159e7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 Aug 2025 22:05:57 +0000 Subject: [PATCH 1/2] Initial plan From c6434a2758a862d3d4c52df281f2f7a7f85a0405 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 Aug 2025 22:28:40 +0000 Subject: [PATCH 2/2] Implement backend parser for UML/SYSML diagrams with database storage Co-authored-by: vinod0m <221896197+vinod0m@users.noreply.github.com> --- examples/parser_demo.py | 186 ++++++++++ src/parsers/.gitignore | 7 + src/parsers/README.md | 120 ++++++ src/parsers/__init__.py | 24 ++ src/parsers/base_parser.py | 121 ++++++ src/parsers/database/__init__.py | 41 ++ src/parsers/database/models.py | 350 +++++++++++++++++ src/parsers/database/utils.py | 342 +++++++++++++++++ src/parsers/drawio_parser.py | 314 ++++++++++++++++ src/parsers/mermaid_parser.py | 434 ++++++++++++++++++++++ src/parsers/plantuml_parser.py | 256 +++++++++++++ test/unit/parsers/test_base_parser.py | 175 +++++++++ test/unit/parsers/test_database.py | 334 +++++++++++++++++ test/unit/parsers/test_drawio_parser.py | 259 +++++++++++++ test/unit/parsers/test_mermaid_parser.py | 290 +++++++++++++++ test/unit/parsers/test_plantuml_parser.py | 271 ++++++++++++++ 16 files changed, 3524 insertions(+) create mode 100644 examples/parser_demo.py create mode 100644 src/parsers/.gitignore create mode 100644 src/parsers/README.md create mode 100644 src/parsers/__init__.py create mode 100644 src/parsers/base_parser.py create mode 100644 src/parsers/database/__init__.py create mode 100644 src/parsers/database/models.py create mode 100644 src/parsers/database/utils.py create mode 100644 src/parsers/drawio_parser.py create mode 100644 src/parsers/mermaid_parser.py create mode 100644 src/parsers/plantuml_parser.py create mode 100644 test/unit/parsers/test_base_parser.py create mode 100644 test/unit/parsers/test_database.py create mode 100644 test/unit/parsers/test_drawio_parser.py create mode 100644 test/unit/parsers/test_mermaid_parser.py create mode 100644 test/unit/parsers/test_plantuml_parser.py diff --git a/examples/parser_demo.py b/examples/parser_demo.py new file mode 100644 index 00000000..951cce25 --- /dev/null +++ b/examples/parser_demo.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +""" +Example script demonstrating the parser functionality. + +This script shows how to use the different parsers to extract information +from diagram files and store them in the database. +""" + +import sys +from pathlib import Path + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from parsers import PlantUMLParser, MermaidParser, DrawIOParser +from parsers.database import DiagramDatabase, get_diagram_statistics +from parsers.base_parser import DiagramType + + +def create_sample_diagrams(): + """Create sample diagram content for testing.""" + + plantuml_content = """ + @startuml + title Sample Class Diagram + + class User { + +id: String + +name: String + +email: String + +login(): boolean + +logout(): void + } + + class Admin { + +permissions: List + +manageUsers(): void + } + + interface Authenticatable { + +authenticate(): boolean + } + + User <|-- Admin + User ..|> Authenticatable + @enduml + """ + + mermaid_content = """ + classDiagram + class Animal { + +String name + +int age + +makeSound() void + } + class Dog { + +String breed + +bark() void + } + class Cat { + +int lives + +meow() void + } + Animal <|-- Dog + Animal <|-- Cat + """ + + drawio_content = """ + + + + + + + + + + + + + + + """ + + return { + 'plantuml': plantuml_content, + 'mermaid': mermaid_content, + 'drawio': drawio_content + } + + +def main(): + """Main demonstration function.""" + print("šŸ” Diagram Parser Demo") + print("=" * 50) + + # Initialize database + db = DiagramDatabase("demo_diagrams.db") + print("āœ… Database initialized") + + # Create parsers + parsers = { + 'PlantUML': PlantUMLParser(), + 'Mermaid': MermaidParser(), + 'DrawIO': DrawIOParser() + } + + # Get sample diagrams + samples = create_sample_diagrams() + + diagram_ids = [] + + # Parse and store each diagram type + for parser_name, parser in parsers.items(): + print(f"\nšŸ“Š Testing {parser_name} Parser") + print("-" * 30) + + if parser_name == 'PlantUML': + content = samples['plantuml'] + filename = "sample.puml" + elif parser_name == 'Mermaid': + content = samples['mermaid'] + filename = "sample.mmd" + else: # DrawIO + content = samples['drawio'] + filename = "sample.drawio" + + try: + # Parse the content + parsed = parser.parse(content, filename) + + print(f" šŸ“„ Source: {parsed.source_file}") + print(f" šŸ”¢ Elements: {len(parsed.elements)}") + print(f" šŸ”— Relationships: {len(parsed.relationships)}") + print(f" šŸ·ļø Tags: {len(parsed.tags)}") + + # Store in database + diagram_id = db.store_diagram(parsed) + diagram_ids.append(diagram_id) + print(f" šŸ’¾ Stored with ID: {diagram_id}") + + # Show element details + for element in parsed.elements[:3]: # Show first 3 elements + print(f" - {element.element_type.value}: {element.name}") + + if len(parsed.elements) > 3: + print(f" ... and {len(parsed.elements) - 3} more") + + except Exception as e: + print(f" āŒ Error: {e}") + + # Show database statistics + print(f"\nšŸ“ˆ Database Statistics") + print("-" * 30) + + all_diagrams = db.get_all_diagrams() + print(f" šŸ“Š Total diagrams: {len(all_diagrams)}") + + for diagram_id in diagram_ids: + stats = get_diagram_statistics(db, diagram_id) + diagram = db.get_diagram(diagram_id) + if diagram: + print(f" \nšŸ” {diagram.source_file} ({diagram.diagram_type}):") + print(f" - Elements: {stats['total_elements']}") + print(f" - Relationships: {stats['total_relationships']}") + print(f" - Element types: {list(stats['element_type_counts'].keys())}") + + # Demonstrate search functionality + print(f"\nšŸ” Search Examples") + print("-" * 30) + + # Search by element type + classes = db.search_elements_by_type("class") + print(f" šŸ“‹ Found {len(classes)} class elements") + + # Search by tags (if any were created) + tag_results = db.search_by_tags(["important", "core", "api"]) + total_tagged = sum(len(results) for results in tag_results.values()) + print(f" šŸ·ļø Found {total_tagged} tagged items") + + print(f"\nāœ… Demo completed successfully!") + print(f"šŸ’¾ Database saved as: demo_diagrams.db") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/parsers/.gitignore b/src/parsers/.gitignore new file mode 100644 index 00000000..4dc3184c --- /dev/null +++ b/src/parsers/.gitignore @@ -0,0 +1,7 @@ +# Ignore database files created during testing +*.db + +# Ignore Python cache +__pycache__/ +*.pyc +*.pyo \ No newline at end of file diff --git a/src/parsers/README.md b/src/parsers/README.md new file mode 100644 index 00000000..701622a8 --- /dev/null +++ b/src/parsers/README.md @@ -0,0 +1,120 @@ +# Parser Module Documentation + +The parser module provides backend parsing capabilities for different diagram input formats (UML/SYSML). It extracts information from diagram sources and creates a relational database with relevant tags for downstream tool implementations. + +## Supported Formats + +### PlantUML (.puml, .plantuml, .pu) +- Class diagrams with attributes and methods +- Interface definitions +- Actors and components +- Inheritance, composition, aggregation, association, and dependency relationships +- Comments and metadata extraction + +### Mermaid (.mmd, .mermaid) +- Class diagrams +- Flowcharts and graphs +- Sequence diagrams +- Entity-relationship diagrams +- Various node shapes and connection types + +### DrawIO (.drawio, .xml) +- XML-based diagram formats +- Shape and connector extraction +- Style property parsing +- Position and geometry information + +## Architecture + +### Base Classes +- `BaseParser`: Abstract base class defining the parser interface +- `ParsedDiagram`: Container for parsed diagram data +- `DiagramElement`: Represents individual diagram elements +- `DiagramRelationship`: Represents relationships between elements + +### Database Layer +- `DiagramDatabase`: SQLite-based storage for parsed diagrams +- Database models for diagrams, elements, and relationships +- Search and query capabilities +- Export functionality (JSON, CSV) + +## Usage Example + +```python +from parsers import PlantUMLParser, MermaidParser, DrawIOParser +from parsers.database import DiagramDatabase + +# Initialize parser and database +parser = PlantUMLParser() +db = DiagramDatabase("diagrams.db") + +# Parse diagram content +with open("diagram.puml", "r") as f: + content = f.read() + +parsed_diagram = parser.parse(content, "diagram.puml") + +# Store in database +diagram_id = db.store_diagram(parsed_diagram) + +# Query elements +elements = db.get_elements(diagram_id) +for element in elements: + print(f"{element.element_type}: {element.name}") +``` + +## Testing + +The module includes comprehensive unit tests for: +- Base parser functionality +- Format-specific parsing +- Database operations +- Search and export features + +Run tests with: `python -m pytest test/unit/parsers/` + +## Database Schema + +### Diagrams Table +- `id`: Unique identifier +- `source_file`: Original file path +- `diagram_type`: Format type (plantuml, mermaid, drawio) +- `metadata`: JSON metadata +- `tags`: JSON tag array +- `created_at`, `updated_at`: Timestamps + +### Elements Table +- `id`: Unique identifier +- `diagram_id`: Foreign key to diagrams +- `element_id`: Element identifier within diagram +- `element_type`: Type (class, interface, component, etc.) +- `name`: Element name +- `properties`: JSON properties +- `position`: JSON position data +- `tags`: JSON tag array + +### Relationships Table +- `id`: Unique identifier +- `diagram_id`: Foreign key to diagrams +- `relationship_id`: Relationship identifier +- `source_element_id`: Source element +- `target_element_id`: Target element +- `relationship_type`: Type (inheritance, composition, etc.) +- `properties`: JSON properties +- `tags`: JSON tag array + +## Extensibility + +The modular design allows for easy addition of new diagram formats: + +1. Inherit from `BaseParser` +2. Implement required abstract methods +3. Add format-specific parsing logic +4. Register in the main module + +## Error Handling + +- `ParseError`: Raised when diagram parsing fails +- Graceful handling of malformed content +- Validation of diagram integrity +- Database transaction safety \ No newline at end of file diff --git a/src/parsers/__init__.py b/src/parsers/__init__.py new file mode 100644 index 00000000..371b5165 --- /dev/null +++ b/src/parsers/__init__.py @@ -0,0 +1,24 @@ +""" +Parser module for processing different diagram formats (UML/SYSML). + +This module provides parsers for: +- PlantUML (.puml, .plantuml) +- Mermaid (.mmd, .mermaid) +- DrawIO (.drawio, .xml) + +The parsers extract information from diagram sources and create structured data +with relevant tags for downstream tool implementations. +""" + +from .base_parser import BaseParser, ParsedDiagram +from .plantuml_parser import PlantUMLParser +from .mermaid_parser import MermaidParser +from .drawio_parser import DrawIOParser + +__all__ = [ + 'BaseParser', + 'ParsedDiagram', + 'PlantUMLParser', + 'MermaidParser', + 'DrawIOParser' +] \ No newline at end of file diff --git a/src/parsers/base_parser.py b/src/parsers/base_parser.py new file mode 100644 index 00000000..0b26842f --- /dev/null +++ b/src/parsers/base_parser.py @@ -0,0 +1,121 @@ +""" +Base parser interface for diagram parsing. + +This module defines the common interface and data structures for all diagram parsers. +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Dict, List, Any, Optional +from enum import Enum + + +class DiagramType(Enum): + """Supported diagram types.""" + PLANTUML = "plantuml" + MERMAID = "mermaid" + DRAWIO = "drawio" + + +class ElementType(Enum): + """Types of diagram elements.""" + CLASS = "class" + INTERFACE = "interface" + COMPONENT = "component" + ACTOR = "actor" + USE_CASE = "use_case" + RELATIONSHIP = "relationship" + PACKAGE = "package" + NOTE = "note" + BOUNDARY = "boundary" + CONTROL = "control" + ENTITY = "entity" + + +@dataclass +class DiagramElement: + """Represents a single element in a diagram.""" + id: str + element_type: ElementType + name: str + properties: Dict[str, Any] = field(default_factory=dict) + position: Optional[Dict[str, float]] = None + tags: List[str] = field(default_factory=list) + + +@dataclass +class DiagramRelationship: + """Represents a relationship between diagram elements.""" + id: str + source_id: str + target_id: str + relationship_type: str + properties: Dict[str, Any] = field(default_factory=dict) + tags: List[str] = field(default_factory=list) + + +@dataclass +class ParsedDiagram: + """Container for parsed diagram data.""" + diagram_type: DiagramType + source_file: str + elements: List[DiagramElement] = field(default_factory=list) + relationships: List[DiagramRelationship] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + tags: List[str] = field(default_factory=list) + + +class BaseParser(ABC): + """Abstract base class for all diagram parsers.""" + + @property + @abstractmethod + def supported_extensions(self) -> List[str]: + """Return list of supported file extensions.""" + pass + + @property + @abstractmethod + def diagram_type(self) -> DiagramType: + """Return the diagram type this parser handles.""" + pass + + @abstractmethod + def parse(self, content: str, source_file: str = "") -> ParsedDiagram: + """ + Parse diagram content and return structured data. + + Args: + content: Raw diagram content (string) + source_file: Optional source file path for context + + Returns: + ParsedDiagram object containing extracted information + + Raises: + ParseError: If content cannot be parsed + """ + pass + + def parse_file(self, file_path: str) -> ParsedDiagram: + """ + Parse diagram from file. + + Args: + file_path: Path to diagram file + + Returns: + ParsedDiagram object containing extracted information + """ + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + return self.parse(content, file_path) + + def validate_extension(self, file_path: str) -> bool: + """Check if file extension is supported by this parser.""" + return any(file_path.lower().endswith(ext) for ext in self.supported_extensions) + + +class ParseError(Exception): + """Exception raised when diagram parsing fails.""" + pass \ No newline at end of file diff --git a/src/parsers/database/__init__.py b/src/parsers/database/__init__.py new file mode 100644 index 00000000..e4acc773 --- /dev/null +++ b/src/parsers/database/__init__.py @@ -0,0 +1,41 @@ +""" +Database package for diagram parsing. + +This package provides database models and utilities for storing and querying +parsed diagram information. +""" + +from .models import ( + DiagramRecord, + ElementRecord, + RelationshipRecord, + DiagramDatabase +) + +from .utils import ( + DiagramQueryBuilder, + export_diagram_to_json, + export_elements_to_csv, + get_diagram_statistics, + find_orphaned_elements, + find_circular_dependencies, + get_element_dependencies, + merge_diagrams, + validate_diagram_integrity +) + +__all__ = [ + 'DiagramRecord', + 'ElementRecord', + 'RelationshipRecord', + 'DiagramDatabase', + 'DiagramQueryBuilder', + 'export_diagram_to_json', + 'export_elements_to_csv', + 'get_diagram_statistics', + 'find_orphaned_elements', + 'find_circular_dependencies', + 'get_element_dependencies', + 'merge_diagrams', + 'validate_diagram_integrity' +] \ No newline at end of file diff --git a/src/parsers/database/models.py b/src/parsers/database/models.py new file mode 100644 index 00000000..7dd96051 --- /dev/null +++ b/src/parsers/database/models.py @@ -0,0 +1,350 @@ +""" +Database models for storing parsed diagram information. + +This module defines data models and database schema for storing +extracted diagram elements, relationships, and metadata. +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Any, Optional, Union +from datetime import datetime +import json +import sqlite3 +from pathlib import Path + + +@dataclass +class DiagramRecord: + """Database record for a parsed diagram.""" + id: Optional[int] = None + source_file: str = "" + diagram_type: str = "" + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + metadata: Dict[str, Any] = field(default_factory=dict) + tags: List[str] = field(default_factory=list) + + +@dataclass +class ElementRecord: + """Database record for a diagram element.""" + id: Optional[int] = None + diagram_id: int = 0 + element_id: str = "" + element_type: str = "" + name: str = "" + properties: Dict[str, Any] = field(default_factory=dict) + position: Optional[Dict[str, float]] = None + tags: List[str] = field(default_factory=list) + + +@dataclass +class RelationshipRecord: + """Database record for a diagram relationship.""" + id: Optional[int] = None + diagram_id: int = 0 + relationship_id: str = "" + source_element_id: str = "" + target_element_id: str = "" + relationship_type: str = "" + properties: Dict[str, Any] = field(default_factory=dict) + tags: List[str] = field(default_factory=list) + + +class DiagramDatabase: + """SQLite database for storing parsed diagram information.""" + + def __init__(self, db_path: Union[str, Path] = "diagrams.db"): + """Initialize database connection and create tables if needed.""" + self.db_path = str(db_path) + self._init_database() + + def _init_database(self): + """Initialize database schema.""" + with sqlite3.connect(self.db_path) as conn: + conn.execute(''' + CREATE TABLE IF NOT EXISTS diagrams ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_file TEXT NOT NULL, + diagram_type TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + metadata TEXT, + tags TEXT + ) + ''') + + conn.execute(''' + CREATE TABLE IF NOT EXISTS elements ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + diagram_id INTEGER NOT NULL, + element_id TEXT NOT NULL, + element_type TEXT NOT NULL, + name TEXT NOT NULL, + properties TEXT, + position TEXT, + tags TEXT, + FOREIGN KEY (diagram_id) REFERENCES diagrams (id) ON DELETE CASCADE + ) + ''') + + conn.execute(''' + CREATE TABLE IF NOT EXISTS relationships ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + diagram_id INTEGER NOT NULL, + relationship_id TEXT NOT NULL, + source_element_id TEXT NOT NULL, + target_element_id TEXT NOT NULL, + relationship_type TEXT NOT NULL, + properties TEXT, + tags TEXT, + FOREIGN KEY (diagram_id) REFERENCES diagrams (id) ON DELETE CASCADE + ) + ''') + + # Create indexes for better query performance + conn.execute('CREATE INDEX IF NOT EXISTS idx_elements_diagram_id ON elements (diagram_id)') + conn.execute('CREATE INDEX IF NOT EXISTS idx_elements_type ON elements (element_type)') + conn.execute('CREATE INDEX IF NOT EXISTS idx_relationships_diagram_id ON relationships (diagram_id)') + conn.execute('CREATE INDEX IF NOT EXISTS idx_relationships_type ON relationships (relationship_type)') + conn.execute('CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships (source_element_id)') + conn.execute('CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships (target_element_id)') + + def store_diagram(self, parsed_diagram) -> int: + """ + Store a parsed diagram in the database. + + Args: + parsed_diagram: ParsedDiagram object to store + + Returns: + Database ID of the stored diagram + """ + from ..base_parser import ParsedDiagram + + with sqlite3.connect(self.db_path) as conn: + # Insert diagram record + cursor = conn.execute(''' + INSERT INTO diagrams (source_file, diagram_type, metadata, tags) + VALUES (?, ?, ?, ?) + ''', ( + parsed_diagram.source_file, + parsed_diagram.diagram_type.value, + json.dumps(parsed_diagram.metadata), + json.dumps(parsed_diagram.tags) + )) + + diagram_id = cursor.lastrowid + + # Insert elements + for element in parsed_diagram.elements: + conn.execute(''' + INSERT INTO elements ( + diagram_id, element_id, element_type, name, + properties, position, tags + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ''', ( + diagram_id, + element.id, + element.element_type.value, + element.name, + json.dumps(element.properties), + json.dumps(element.position) if element.position else None, + json.dumps(element.tags) + )) + + # Insert relationships + for relationship in parsed_diagram.relationships: + conn.execute(''' + INSERT INTO relationships ( + diagram_id, relationship_id, source_element_id, + target_element_id, relationship_type, properties, tags + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ''', ( + diagram_id, + relationship.id, + relationship.source_id, + relationship.target_id, + relationship.relationship_type, + json.dumps(relationship.properties), + json.dumps(relationship.tags) + )) + + conn.commit() + return diagram_id + + def get_diagram(self, diagram_id: int) -> Optional[DiagramRecord]: + """Retrieve a diagram record by ID.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute(''' + SELECT * FROM diagrams WHERE id = ? + ''', (diagram_id,)) + + row = cursor.fetchone() + if row: + return DiagramRecord( + id=row['id'], + source_file=row['source_file'], + diagram_type=row['diagram_type'], + created_at=datetime.fromisoformat(row['created_at']) if row['created_at'] else None, + updated_at=datetime.fromisoformat(row['updated_at']) if row['updated_at'] else None, + metadata=json.loads(row['metadata']) if row['metadata'] else {}, + tags=json.loads(row['tags']) if row['tags'] else [] + ) + return None + + def get_elements(self, diagram_id: int) -> List[ElementRecord]: + """Retrieve all elements for a diagram.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute(''' + SELECT * FROM elements WHERE diagram_id = ? + ''', (diagram_id,)) + + elements = [] + for row in cursor.fetchall(): + elements.append(ElementRecord( + id=row['id'], + diagram_id=row['diagram_id'], + element_id=row['element_id'], + element_type=row['element_type'], + name=row['name'], + properties=json.loads(row['properties']) if row['properties'] else {}, + position=json.loads(row['position']) if row['position'] else None, + tags=json.loads(row['tags']) if row['tags'] else [] + )) + + return elements + + def get_relationships(self, diagram_id: int) -> List[RelationshipRecord]: + """Retrieve all relationships for a diagram.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute(''' + SELECT * FROM relationships WHERE diagram_id = ? + ''', (diagram_id,)) + + relationships = [] + for row in cursor.fetchall(): + relationships.append(RelationshipRecord( + id=row['id'], + diagram_id=row['diagram_id'], + relationship_id=row['relationship_id'], + source_element_id=row['source_element_id'], + target_element_id=row['target_element_id'], + relationship_type=row['relationship_type'], + properties=json.loads(row['properties']) if row['properties'] else {}, + tags=json.loads(row['tags']) if row['tags'] else [] + )) + + return relationships + + def search_elements_by_type(self, element_type: str) -> List[ElementRecord]: + """Search elements by type across all diagrams.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute(''' + SELECT * FROM elements WHERE element_type = ? + ''', (element_type,)) + + elements = [] + for row in cursor.fetchall(): + elements.append(ElementRecord( + id=row['id'], + diagram_id=row['diagram_id'], + element_id=row['element_id'], + element_type=row['element_type'], + name=row['name'], + properties=json.loads(row['properties']) if row['properties'] else {}, + position=json.loads(row['position']) if row['position'] else None, + tags=json.loads(row['tags']) if row['tags'] else [] + )) + + return elements + + def search_by_tags(self, tags: List[str]) -> List[Dict[str, Any]]: + """Search diagrams, elements, and relationships by tags.""" + results = {'diagrams': [], 'elements': [], 'relationships': []} + + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + + # Search diagrams + for tag in tags: + cursor = conn.execute(''' + SELECT * FROM diagrams WHERE tags LIKE ? + ''', (f'%{tag}%',)) + + for row in cursor.fetchall(): + diagram_tags = json.loads(row['tags']) if row['tags'] else [] + if tag in diagram_tags: + results['diagrams'].append({ + 'id': row['id'], + 'source_file': row['source_file'], + 'diagram_type': row['diagram_type'], + 'matching_tag': tag + }) + + # Search elements + for tag in tags: + cursor = conn.execute(''' + SELECT * FROM elements WHERE tags LIKE ? + ''', (f'%{tag}%',)) + + for row in cursor.fetchall(): + element_tags = json.loads(row['tags']) if row['tags'] else [] + if tag in element_tags: + results['elements'].append({ + 'id': row['id'], + 'diagram_id': row['diagram_id'], + 'element_id': row['element_id'], + 'name': row['name'], + 'element_type': row['element_type'], + 'matching_tag': tag + }) + + # Search relationships + for tag in tags: + cursor = conn.execute(''' + SELECT * FROM relationships WHERE tags LIKE ? + ''', (f'%{tag}%',)) + + for row in cursor.fetchall(): + rel_tags = json.loads(row['tags']) if row['tags'] else [] + if tag in rel_tags: + results['relationships'].append({ + 'id': row['id'], + 'diagram_id': row['diagram_id'], + 'relationship_id': row['relationship_id'], + 'relationship_type': row['relationship_type'], + 'matching_tag': tag + }) + + return results + + def get_all_diagrams(self) -> List[DiagramRecord]: + """Retrieve all diagram records.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute('SELECT * FROM diagrams ORDER BY created_at DESC') + + diagrams = [] + for row in cursor.fetchall(): + diagrams.append(DiagramRecord( + id=row['id'], + source_file=row['source_file'], + diagram_type=row['diagram_type'], + created_at=datetime.fromisoformat(row['created_at']) if row['created_at'] else None, + updated_at=datetime.fromisoformat(row['updated_at']) if row['updated_at'] else None, + metadata=json.loads(row['metadata']) if row['metadata'] else {}, + tags=json.loads(row['tags']) if row['tags'] else [] + )) + + return diagrams + + def delete_diagram(self, diagram_id: int) -> bool: + """Delete a diagram and all its related records.""" + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute('DELETE FROM diagrams WHERE id = ?', (diagram_id,)) + return cursor.rowcount > 0 \ No newline at end of file diff --git a/src/parsers/database/utils.py b/src/parsers/database/utils.py new file mode 100644 index 00000000..7efe294d --- /dev/null +++ b/src/parsers/database/utils.py @@ -0,0 +1,342 @@ +""" +Database utilities for diagram parsing operations. + +This module provides utility functions for working with the diagram database, +including import/export, querying, and data manipulation functions. +""" + +from typing import List, Dict, Any, Optional, Union +from pathlib import Path +import json +import csv +from .models import DiagramDatabase, DiagramRecord, ElementRecord, RelationshipRecord + + +class DiagramQueryBuilder: + """Helper class for building complex database queries.""" + + def __init__(self, db: DiagramDatabase): + self.db = db + self._filters = [] + self._joins = [] + self._select_fields = [] + + def filter_by_diagram_type(self, diagram_type: str): + """Filter by diagram type.""" + self._filters.append(f"diagrams.diagram_type = '{diagram_type}'") + return self + + def filter_by_element_type(self, element_type: str): + """Filter by element type.""" + self._joins.append("JOIN elements ON diagrams.id = elements.diagram_id") + self._filters.append(f"elements.element_type = '{element_type}'") + return self + + def filter_by_relationship_type(self, relationship_type: str): + """Filter by relationship type.""" + self._joins.append("JOIN relationships ON diagrams.id = relationships.diagram_id") + self._filters.append(f"relationships.relationship_type = '{relationship_type}'") + return self + + def build_query(self) -> str: + """Build the SQL query string.""" + base_query = "SELECT DISTINCT diagrams.* FROM diagrams" + + if self._joins: + base_query += " " + " ".join(set(self._joins)) + + if self._filters: + base_query += " WHERE " + " AND ".join(self._filters) + + return base_query + + +def export_diagram_to_json(db: DiagramDatabase, diagram_id: int) -> Dict[str, Any]: + """Export a complete diagram to JSON format.""" + diagram = db.get_diagram(diagram_id) + if not diagram: + raise ValueError(f"Diagram with ID {diagram_id} not found") + + elements = db.get_elements(diagram_id) + relationships = db.get_relationships(diagram_id) + + return { + 'diagram': { + 'id': diagram.id, + 'source_file': diagram.source_file, + 'diagram_type': diagram.diagram_type, + 'created_at': diagram.created_at.isoformat() if diagram.created_at else None, + 'updated_at': diagram.updated_at.isoformat() if diagram.updated_at else None, + 'metadata': diagram.metadata, + 'tags': diagram.tags + }, + 'elements': [ + { + 'id': elem.id, + 'element_id': elem.element_id, + 'element_type': elem.element_type, + 'name': elem.name, + 'properties': elem.properties, + 'position': elem.position, + 'tags': elem.tags + } + for elem in elements + ], + 'relationships': [ + { + 'id': rel.id, + 'relationship_id': rel.relationship_id, + 'source_element_id': rel.source_element_id, + 'target_element_id': rel.target_element_id, + 'relationship_type': rel.relationship_type, + 'properties': rel.properties, + 'tags': rel.tags + } + for rel in relationships + ] + } + + +def export_elements_to_csv(db: DiagramDatabase, diagram_id: int, output_path: Union[str, Path]): + """Export diagram elements to CSV file.""" + elements = db.get_elements(diagram_id) + + with open(output_path, 'w', newline='', encoding='utf-8') as csvfile: + fieldnames = [ + 'element_id', 'element_type', 'name', 'properties', + 'position', 'tags' + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for element in elements: + writer.writerow({ + 'element_id': element.element_id, + 'element_type': element.element_type, + 'name': element.name, + 'properties': json.dumps(element.properties), + 'position': json.dumps(element.position) if element.position else '', + 'tags': json.dumps(element.tags) + }) + + +def get_diagram_statistics(db: DiagramDatabase, diagram_id: int) -> Dict[str, Any]: + """Get statistics for a diagram.""" + elements = db.get_elements(diagram_id) + relationships = db.get_relationships(diagram_id) + + # Element type counts + element_type_counts = {} + for element in elements: + element_type_counts[element.element_type] = element_type_counts.get(element.element_type, 0) + 1 + + # Relationship type counts + relationship_type_counts = {} + for relationship in relationships: + relationship_type_counts[relationship.relationship_type] = relationship_type_counts.get(relationship.relationship_type, 0) + 1 + + # Tag frequency + all_tags = [] + for element in elements: + all_tags.extend(element.tags) + for relationship in relationships: + all_tags.extend(relationship.tags) + + tag_counts = {} + for tag in all_tags: + tag_counts[tag] = tag_counts.get(tag, 0) + 1 + + return { + 'total_elements': len(elements), + 'total_relationships': len(relationships), + 'element_type_counts': element_type_counts, + 'relationship_type_counts': relationship_type_counts, + 'tag_counts': tag_counts, + 'unique_tags': len(tag_counts) + } + + +def find_orphaned_elements(db: DiagramDatabase, diagram_id: int) -> List[ElementRecord]: + """Find elements that have no relationships.""" + elements = db.get_elements(diagram_id) + relationships = db.get_relationships(diagram_id) + + # Get all element IDs that are in relationships + connected_elements = set() + for rel in relationships: + connected_elements.add(rel.source_element_id) + connected_elements.add(rel.target_element_id) + + # Find elements not in any relationship + orphaned = [] + for element in elements: + if element.element_id not in connected_elements: + orphaned.append(element) + + return orphaned + + +def find_circular_dependencies(db: DiagramDatabase, diagram_id: int) -> List[List[str]]: + """Find circular dependencies in relationships.""" + relationships = db.get_relationships(diagram_id) + + # Build adjacency list + graph = {} + for rel in relationships: + if rel.source_element_id not in graph: + graph[rel.source_element_id] = [] + graph[rel.source_element_id].append(rel.target_element_id) + + # Find cycles using DFS + def find_cycles_dfs(node, path, visited, cycles): + if node in path: + # Found a cycle + cycle_start = path.index(node) + cycle = path[cycle_start:] + [node] + cycles.append(cycle) + return + + if node in visited: + return + + visited.add(node) + path.append(node) + + if node in graph: + for neighbor in graph[node]: + find_cycles_dfs(neighbor, path, visited, cycles) + + path.pop() + + cycles = [] + visited = set() + + for node in graph: + if node not in visited: + find_cycles_dfs(node, [], visited, cycles) + + return cycles + + +def get_element_dependencies(db: DiagramDatabase, diagram_id: int, element_id: str) -> Dict[str, List[str]]: + """Get dependencies for a specific element.""" + relationships = db.get_relationships(diagram_id) + + dependencies = { + 'depends_on': [], # Elements this element depends on + 'depended_by': [] # Elements that depend on this element + } + + for rel in relationships: + if rel.source_element_id == element_id: + dependencies['depends_on'].append(rel.target_element_id) + elif rel.target_element_id == element_id: + dependencies['depended_by'].append(rel.source_element_id) + + return dependencies + + +def merge_diagrams(db: DiagramDatabase, diagram_ids: List[int], new_source_file: str) -> int: + """Merge multiple diagrams into a new diagram.""" + from ..base_parser import ParsedDiagram, DiagramType + + # Create new merged diagram + merged_diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, # Default type + source_file=new_source_file, + metadata={'merged_from': diagram_ids}, + tags=['merged'] + ) + + element_id_mapping = {} # Map old IDs to new IDs to avoid conflicts + id_counter = 1 + + # Collect all elements + for diagram_id in diagram_ids: + elements = db.get_elements(diagram_id) + for element in elements: + # Create unique ID for merged diagram + new_id = f"elem_{id_counter}" + element_id_mapping[f"{diagram_id}_{element.element_id}"] = new_id + id_counter += 1 + + # Add element to merged diagram + from ..base_parser import DiagramElement, ElementType + merged_element = DiagramElement( + id=new_id, + element_type=ElementType(element.element_type), + name=element.name, + properties=element.properties, + position=element.position, + tags=element.tags + [f"from_diagram_{diagram_id}"] + ) + merged_diagram.elements.append(merged_element) + + # Collect all relationships + for diagram_id in diagram_ids: + relationships = db.get_relationships(diagram_id) + for relationship in relationships: + source_key = f"{diagram_id}_{relationship.source_element_id}" + target_key = f"{diagram_id}_{relationship.target_element_id}" + + # Only add relationship if both elements exist in the merged diagram + if source_key in element_id_mapping and target_key in element_id_mapping: + from ..base_parser import DiagramRelationship + merged_rel = DiagramRelationship( + id=f"rel_{len(merged_diagram.relationships) + 1}", + source_id=element_id_mapping[source_key], + target_id=element_id_mapping[target_key], + relationship_type=relationship.relationship_type, + properties=relationship.properties, + tags=relationship.tags + [f"from_diagram_{diagram_id}"] + ) + merged_diagram.relationships.append(merged_rel) + + # Store merged diagram + return db.store_diagram(merged_diagram) + + +def validate_diagram_integrity(db: DiagramDatabase, diagram_id: int) -> Dict[str, Any]: + """Validate diagram integrity and return issues found.""" + elements = db.get_elements(diagram_id) + relationships = db.get_relationships(diagram_id) + + issues = { + 'missing_elements': [], + 'duplicate_element_ids': [], + 'orphaned_elements': [], + 'circular_dependencies': [], + 'invalid_relationships': [] + } + + # Check for missing elements referenced in relationships + element_ids = {elem.element_id for elem in elements} + for rel in relationships: + if rel.source_element_id not in element_ids: + issues['missing_elements'].append(rel.source_element_id) + if rel.target_element_id not in element_ids: + issues['missing_elements'].append(rel.target_element_id) + + # Check for duplicate element IDs + seen_ids = set() + for elem in elements: + if elem.element_id in seen_ids: + issues['duplicate_element_ids'].append(elem.element_id) + else: + seen_ids.add(elem.element_id) + + # Find orphaned elements + issues['orphaned_elements'] = [elem.element_id for elem in find_orphaned_elements(db, diagram_id)] + + # Find circular dependencies + issues['circular_dependencies'] = find_circular_dependencies(db, diagram_id) + + # Check for self-referencing relationships + for rel in relationships: + if rel.source_element_id == rel.target_element_id: + issues['invalid_relationships'].append({ + 'relationship_id': rel.relationship_id, + 'issue': 'self_reference' + }) + + return issues \ No newline at end of file diff --git a/src/parsers/drawio_parser.py b/src/parsers/drawio_parser.py new file mode 100644 index 00000000..47bd47e0 --- /dev/null +++ b/src/parsers/drawio_parser.py @@ -0,0 +1,314 @@ +""" +DrawIO parser for extracting diagram information. + +This parser handles DrawIO format files (.drawio, .xml) and extracts +shapes, connectors, and other diagram elements from the XML structure. +""" + +import xml.etree.ElementTree as ET +import json +import base64 +import zlib +import urllib.parse +from typing import List, Dict, Any, Optional +from .base_parser import BaseParser, ParsedDiagram, DiagramElement, DiagramRelationship +from .base_parser import DiagramType, ElementType, ParseError + + +class DrawIOParser(BaseParser): + """Parser for DrawIO diagram files.""" + + @property + def supported_extensions(self) -> List[str]: + return ['.drawio', '.xml'] + + @property + def diagram_type(self) -> DiagramType: + return DiagramType.DRAWIO + + def parse(self, content: str, source_file: str = "") -> ParsedDiagram: + """Parse DrawIO content and extract diagram information.""" + try: + diagram = ParsedDiagram( + diagram_type=self.diagram_type, + source_file=source_file + ) + + # Parse XML content + root = ET.fromstring(content) + + # Extract metadata + diagram.metadata = self._extract_metadata(root) + + # Find diagram pages + diagrams = root.findall('.//diagram') + + if diagrams: + # Process first diagram page (multi-page support can be added later) + diagram_data = self._decode_diagram_data(diagrams[0].text) + if diagram_data: + self._parse_diagram_data(diagram_data, diagram) + else: + # Direct XML format (not compressed) + self._parse_direct_xml(root, diagram) + + return diagram + + except ET.ParseError as e: + raise ParseError(f"Invalid XML format: {str(e)}") + except Exception as e: + raise ParseError(f"Failed to parse DrawIO content: {str(e)}") + + def _extract_metadata(self, root: ET.Element) -> Dict[str, Any]: + """Extract metadata from the root element.""" + metadata = {} + + # Extract file properties + if root.tag == 'mxfile': + metadata['host'] = root.get('host', '') + metadata['modified'] = root.get('modified', '') + metadata['agent'] = root.get('agent', '') + metadata['version'] = root.get('version', '') + + return metadata + + def _decode_diagram_data(self, encoded_data: str) -> Optional[str]: + """Decode compressed diagram data.""" + if not encoded_data: + return None + + try: + # DrawIO uses URL-encoded, base64-encoded, deflate-compressed XML + url_decoded = urllib.parse.unquote(encoded_data) + base64_decoded = base64.b64decode(url_decoded) + decompressed = zlib.decompress(base64_decoded, -zlib.MAX_WBITS) + return decompressed.decode('utf-8') + except Exception: + # Try direct base64 decoding + try: + base64_decoded = base64.b64decode(encoded_data) + return base64_decoded.decode('utf-8') + except Exception: + return None + + def _parse_diagram_data(self, diagram_xml: str, diagram: ParsedDiagram): + """Parse the decoded diagram XML data.""" + try: + root = ET.fromstring(diagram_xml) + self._parse_direct_xml(root, diagram) + except ET.ParseError: + # If parsing fails, treat as invalid data + pass + + def _parse_direct_xml(self, root: ET.Element, diagram: ParsedDiagram): + """Parse XML elements directly.""" + # Find all cells (shapes and connectors) + cells = root.findall('.//mxCell') + + # Separate elements and relationships + elements_map = {} + + for cell in cells: + cell_id = cell.get('id', '') + if not cell_id or cell_id in ['0', '1']: # Skip root cells + continue + + # Check if this is a connector (edge) + if cell.get('edge') == '1': + self._parse_connector(cell, diagram) + else: + # Parse as element (vertex) + element = self._parse_element(cell) + if element: + diagram.elements.append(element) + elements_map[cell_id] = element + + def _parse_element(self, cell: ET.Element) -> Optional[DiagramElement]: + """Parse a cell as a diagram element.""" + cell_id = cell.get('id', '') + value = cell.get('value', '') + style = cell.get('style', '') + + # Determine element type based on style + element_type = self._determine_element_type(style, value) + + # Extract position and size + geometry = cell.find('mxGeometry') + position = None + size = None + + if geometry is not None: + x = geometry.get('x') + y = geometry.get('y') + width = geometry.get('width') + height = geometry.get('height') + + if x is not None and y is not None: + position = {'x': float(x), 'y': float(y)} + + if width is not None and height is not None: + size = {'width': float(width), 'height': float(height)} + + # Parse style properties + style_props = self._parse_style(style) + + properties = { + 'style': style_props, + 'original_style': style + } + + if size: + properties['size'] = size + + # Extract text content + name = self._extract_text_content(value) + + element = DiagramElement( + id=cell_id, + element_type=element_type, + name=name, + properties=properties, + position=position, + tags=self._extract_element_tags(style, value) + ) + + return element + + def _parse_connector(self, cell: ET.Element, diagram: ParsedDiagram): + """Parse a connector (edge) as a relationship.""" + cell_id = cell.get('id', '') + source = cell.get('source', '') + target = cell.get('target', '') + value = cell.get('value', '') + style = cell.get('style', '') + + if not source or not target: + return + + # Determine relationship type from style + rel_type = self._determine_relationship_type(style, value) + + # Parse style properties + style_props = self._parse_style(style) + + properties = { + 'style': style_props, + 'original_style': style + } + + # Extract label text + if value: + properties['label'] = self._extract_text_content(value) + + relationship = DiagramRelationship( + id=cell_id, + source_id=source, + target_id=target, + relationship_type=rel_type, + properties=properties, + tags=self._extract_element_tags(style, value) + ) + + diagram.relationships.append(relationship) + + def _determine_element_type(self, style: str, value: str) -> ElementType: + """Determine element type based on style and content.""" + style_lower = style.lower() + value_lower = value.lower() if value else '' + + # Check for specific shapes + if 'umlactor' in style_lower or 'actor' in value_lower: + return ElementType.ACTOR + elif 'rhombus' in style_lower or 'diamond' in style_lower: + return ElementType.BOUNDARY + elif 'cylinder' in style_lower or 'database' in style_lower: + return ElementType.ENTITY + elif 'ellipse' in style_lower and ('interface' in value_lower or 'i:' in value_lower): + return ElementType.INTERFACE + elif 'rectangle' in style_lower or 'class' in value_lower: + return ElementType.CLASS + elif 'note' in style_lower: + return ElementType.NOTE + else: + return ElementType.COMPONENT + + def _determine_relationship_type(self, style: str, value: str) -> str: + """Determine relationship type based on style and content.""" + style_lower = style.lower() + value_lower = value.lower() if value else '' + + # Check arrow types and line styles + if 'inheritance' in style_lower or 'extends' in value_lower: + return 'inheritance' + elif 'composition' in style_lower or 'filled' in style_lower: + return 'composition' + elif 'aggregation' in style_lower: + return 'aggregation' + elif 'dashed' in style_lower or 'dotted' in style_lower: + return 'dependency' + elif 'implements' in value_lower: + return 'realization' + else: + return 'association' + + def _parse_style(self, style: str) -> Dict[str, str]: + """Parse DrawIO style string into properties.""" + properties = {} + + if not style: + return properties + + # Split style by semicolons + style_parts = style.split(';') + + for part in style_parts: + if '=' in part: + key, value = part.split('=', 1) + properties[key] = value + else: + # Style without value (like shape names) + properties[part] = 'true' + + return properties + + def _extract_text_content(self, value: str) -> str: + """Extract text content from HTML-like value.""" + if not value: + return '' + + # Remove HTML tags if present + import re + clean_text = re.sub(r'<[^>]+>', '', value) + + # Decode HTML entities + clean_text = clean_text.replace('<', '<') + clean_text = clean_text.replace('>', '>') + clean_text = clean_text.replace('&', '&') + clean_text = clean_text.replace('"', '"') + + return clean_text.strip() + + def _extract_element_tags(self, style: str, value: str) -> List[str]: + """Extract tags from style and value.""" + tags = [] + + # Extract style-based tags + style_props = self._parse_style(style) + + # Add significant style properties as tags + significant_props = ['shape', 'fillColor', 'strokeColor', 'fontFamily'] + for prop in significant_props: + if prop in style_props: + tags.append(f"{prop}:{style_props[prop]}") + + # Extract content-based tags + if value: + value_lower = value.lower() + if 'class' in value_lower: + tags.append('class') + if 'interface' in value_lower: + tags.append('interface') + if 'abstract' in value_lower: + tags.append('abstract') + + return tags \ No newline at end of file diff --git a/src/parsers/mermaid_parser.py b/src/parsers/mermaid_parser.py new file mode 100644 index 00000000..2aeff9af --- /dev/null +++ b/src/parsers/mermaid_parser.py @@ -0,0 +1,434 @@ +""" +Mermaid parser for extracting diagram information. + +This parser handles Mermaid format files (.mmd, .mermaid) and extracts +classes, relationships, and other diagram elements from various Mermaid diagram types. +""" + +import re +import json +from typing import List, Dict, Any, Optional +from .base_parser import BaseParser, ParsedDiagram, DiagramElement, DiagramRelationship +from .base_parser import DiagramType, ElementType, ParseError + + +class MermaidParser(BaseParser): + """Parser for Mermaid diagram files.""" + + @property + def supported_extensions(self) -> List[str]: + return ['.mmd', '.mermaid'] + + @property + def diagram_type(self) -> DiagramType: + return DiagramType.MERMAID + + def parse(self, content: str, source_file: str = "") -> ParsedDiagram: + """Parse Mermaid content and extract diagram information.""" + try: + diagram = ParsedDiagram( + diagram_type=self.diagram_type, + source_file=source_file + ) + + # Clean content + cleaned_content = self._clean_content(content) + + # Determine diagram type + mermaid_type = self._detect_mermaid_type(cleaned_content) + diagram.metadata['mermaid_type'] = mermaid_type + + # Parse based on diagram type + if mermaid_type == 'classDiagram': + self._parse_class_diagram(cleaned_content, diagram) + elif mermaid_type == 'flowchart' or mermaid_type == 'graph': + self._parse_flowchart(cleaned_content, diagram) + elif mermaid_type == 'sequenceDiagram': + self._parse_sequence_diagram(cleaned_content, diagram) + elif mermaid_type == 'erDiagram': + self._parse_er_diagram(cleaned_content, diagram) + else: + # Generic parsing for unknown types + self._parse_generic(cleaned_content, diagram) + + # Extract global tags and metadata + diagram.tags = self._extract_global_tags(cleaned_content) + diagram.metadata.update(self._extract_metadata(cleaned_content)) + + return diagram + + except Exception as e: + raise ParseError(f"Failed to parse Mermaid content: {str(e)}") + + def _clean_content(self, content: str) -> str: + """Clean content by removing comments and normalizing whitespace.""" + # Remove comments + content = re.sub(r'%%.*', '', content) + + # Normalize whitespace but preserve line structure + lines = [line.strip() for line in content.split('\n') if line.strip()] + return '\n'.join(lines) + + def _detect_mermaid_type(self, content: str) -> str: + """Detect the type of Mermaid diagram.""" + first_line = content.split('\n')[0].strip() + + if first_line.startswith('classDiagram'): + return 'classDiagram' + elif first_line.startswith('sequenceDiagram'): + return 'sequenceDiagram' + elif first_line.startswith('erDiagram'): + return 'erDiagram' + elif first_line.startswith('flowchart') or first_line.startswith('graph'): + return first_line.split()[0] + else: + return 'unknown' + + def _parse_class_diagram(self, content: str, diagram: ParsedDiagram): + """Parse class diagram specific content.""" + lines = content.split('\n')[1:] # Skip diagram type line + + for line in lines: + line = line.strip() + if not line: + continue + + # Class definition: class ClassName + class_match = re.match(r'class\s+(\w+)(?:\s*\{([^}]*)\})?', line) + if class_match: + class_name = class_match.group(1) + class_body = class_match.group(2) if class_match.group(2) else "" + + properties = self._parse_mermaid_class_body(class_body) + + element = DiagramElement( + id=class_name, + element_type=ElementType.CLASS, + name=class_name, + properties=properties, + tags=[] + ) + diagram.elements.append(element) + continue + + # Relationship patterns + self._parse_class_relationships(line, diagram) + + def _parse_mermaid_class_body(self, body: str) -> Dict[str, Any]: + """Parse Mermaid class body.""" + properties = {'methods': [], 'attributes': []} + + if not body: + return properties + + lines = [line.strip() for line in body.split('\n') if line.strip()] + + for line in lines: + if '(' in line and ')' in line: + properties['methods'].append(line) + else: + properties['attributes'].append(line) + + return properties + + def _parse_class_relationships(self, line: str, diagram: ParsedDiagram): + """Parse class diagram relationships.""" + # Mermaid class relationship patterns + patterns = [ + # Inheritance: A <|-- B + (r'(\w+)\s*<\|--\s*(\w+)', 'inheritance'), + (r'(\w+)\s*--\|>\s*(\w+)', 'inheritance'), + + # Composition: A *-- B + (r'(\w+)\s*\*--\s*(\w+)', 'composition'), + (r'(\w+)\s*--\*\s*(\w+)', 'composition'), + + # Aggregation: A o-- B + (r'(\w+)\s*o--\s*(\w+)', 'aggregation'), + (r'(\w+)\s*--o\s*(\w+)', 'aggregation'), + + # Association: A -- B + (r'(\w+)\s*--\s*(\w+)', 'association'), + (r'(\w+)\s*-->\s*(\w+)', 'association'), + + # Dependency: A ..> B + (r'(\w+)\s*\.\.>\s*(\w+)', 'dependency'), + ] + + for pattern, rel_type in patterns: + match = re.match(pattern, line) + if match: + source = match.group(1) + target = match.group(2) + + relationship = DiagramRelationship( + id=f"rel_{len(diagram.relationships) + 1}", + source_id=source, + target_id=target, + relationship_type=rel_type, + properties={}, + tags=[] + ) + diagram.relationships.append(relationship) + break + + def _parse_flowchart(self, content: str, diagram: ParsedDiagram): + """Parse flowchart/graph diagram.""" + lines = content.split('\n')[1:] # Skip diagram type line + + # Track created nodes to avoid duplicates + created_nodes = set() + + for line in lines: + line = line.strip() + if not line: + continue + + # Node definitions with labels: A[Label] or A(Label) or A{Label} + node_patterns = [ + (r'(\w+)\[([^\]]+)\]', 'rectangular'), + (r'(\w+)\(([^)]+)\)', 'rounded'), + (r'(\w+)\{([^}]+)\}', 'diamond'), + (r'(\w+)\(\(([^)]+)\)\)', 'circle'), + ] + + for pattern, shape in node_patterns: + match = re.search(pattern, line) + if match: + node_id = match.group(1) + label = match.group(2) + + if node_id not in created_nodes: + element = DiagramElement( + id=node_id, + element_type=ElementType.COMPONENT, + name=label, + properties={'shape': shape}, + tags=[] + ) + diagram.elements.append(element) + created_nodes.add(node_id) + + # Connection patterns: A --> B or A --- B + connection_patterns = [ + (r'(\w+)\s*-->\s*(\w+)', 'directed'), + (r'(\w+)\s*---\s*(\w+)', 'undirected'), + (r'(\w+)\s*-\.->\s*(\w+)', 'dotted'), + (r'(\w+)\s*==>\s*(\w+)', 'thick'), + ] + + for pattern, style in connection_patterns: + match = re.search(pattern, line) + if match: + source = match.group(1) + target = match.group(2) + + # Create nodes if they don't exist (simple node without labels) + for node_id in [source, target]: + if node_id not in created_nodes: + element = DiagramElement( + id=node_id, + element_type=ElementType.COMPONENT, + name=node_id, + properties={'shape': 'simple'}, + tags=[] + ) + diagram.elements.append(element) + created_nodes.add(node_id) + + relationship = DiagramRelationship( + id=f"rel_{len(diagram.relationships) + 1}", + source_id=source, + target_id=target, + relationship_type='connection', + properties={'style': style}, + tags=[] + ) + diagram.relationships.append(relationship) + + def _parse_sequence_diagram(self, content: str, diagram: ParsedDiagram): + """Parse sequence diagram.""" + lines = content.split('\n')[1:] # Skip diagram type line + + participants = set() + + for line in lines: + line = line.strip() + if not line: + continue + + # Participant declaration + participant_match = re.match(r'participant\s+(\w+)(?:\s+as\s+(.+))?', line) + if participant_match: + participant_id = participant_match.group(1) + participant_name = participant_match.group(2) if participant_match.group(2) else participant_id + participants.add(participant_id) + + element = DiagramElement( + id=participant_id, + element_type=ElementType.ACTOR, + name=participant_name, + properties={}, + tags=[] + ) + diagram.elements.append(element) + continue + + # Message patterns: A->>B: message + message_patterns = [ + (r'(\w+)\s*->>\s*(\w+)\s*:\s*(.+)', 'async_message'), + (r'(\w+)\s*->\s*(\w+)\s*:\s*(.+)', 'sync_message'), + (r'(\w+)\s*-->\s*(\w+)\s*:\s*(.+)', 'return_message'), + ] + + for pattern, msg_type in message_patterns: + match = re.match(pattern, line) + if match: + source = match.group(1) + target = match.group(2) + message = match.group(3) + + # Add participants if not already declared + for participant in [source, target]: + if participant not in participants: + participants.add(participant) + element = DiagramElement( + id=participant, + element_type=ElementType.ACTOR, + name=participant, + properties={}, + tags=[] + ) + diagram.elements.append(element) + + relationship = DiagramRelationship( + id=f"rel_{len(diagram.relationships) + 1}", + source_id=source, + target_id=target, + relationship_type=msg_type, + properties={'message': message}, + tags=[] + ) + diagram.relationships.append(relationship) + break + + def _parse_er_diagram(self, content: str, diagram: ParsedDiagram): + """Parse entity-relationship diagram.""" + lines = content.split('\n')[1:] # Skip diagram type line + + for line in lines: + line = line.strip() + if not line: + continue + + # Entity definition with attributes: ENTITY { attr1 attr2 } + entity_match = re.match(r'(\w+)\s*\{([^}]*)\}', line) + if entity_match: + entity_name = entity_match.group(1) + attributes_text = entity_match.group(2) + + attributes = [] + if attributes_text: + attr_lines = [attr.strip() for attr in attributes_text.split('\n') if attr.strip()] + for attr_line in attr_lines: + if attr_line: # Skip empty lines + attributes.append(attr_line) + + element = DiagramElement( + id=entity_name, + element_type=ElementType.ENTITY, + name=entity_name, + properties={'attributes': attributes}, + tags=[] + ) + diagram.elements.append(element) + continue + + # Entity definition without attributes: ENTITY + simple_entity_match = re.match(r'^(\w+)$', line) + if simple_entity_match and not any(rel_pattern in line for rel_pattern in ['||', '}o', 'o{', '--']): + entity_name = simple_entity_match.group(1) + + # Check if entity already exists + if not any(elem.id == entity_name for elem in diagram.elements): + element = DiagramElement( + id=entity_name, + element_type=ElementType.ENTITY, + name=entity_name, + properties={'attributes': []}, + tags=[] + ) + diagram.elements.append(element) + continue + + # Relationship patterns: A ||--o{ B + rel_patterns = [ + (r'(\w+)\s*\|\|--o\{\s*(\w+)', 'one_to_many'), + (r'(\w+)\s*\}o--\|\|\s*(\w+)', 'many_to_one'), + (r'(\w+)\s*\|\|--\|\|\s*(\w+)', 'one_to_one'), + (r'(\w+)\s*\}o--o\{\s*(\w+)', 'many_to_many'), + ] + + for pattern, rel_type in rel_patterns: + match = re.match(pattern, line) + if match: + source = match.group(1) + target = match.group(2) + + relationship = DiagramRelationship( + id=f"rel_{len(diagram.relationships) + 1}", + source_id=source, + target_id=target, + relationship_type=rel_type, + properties={}, + tags=[] + ) + diagram.relationships.append(relationship) + break + + def _parse_generic(self, content: str, diagram: ParsedDiagram): + """Generic parsing for unknown diagram types.""" + # Extract any identifiable patterns + lines = content.split('\n') + + for line in lines: + line = line.strip() + if not line: + continue + + # Look for simple node definitions + node_match = re.search(r'(\w+)', line) + if node_match: + node_id = node_match.group(1) + + # Check if this looks like an element definition + if not any(elem.id == node_id for elem in diagram.elements): + element = DiagramElement( + id=node_id, + element_type=ElementType.COMPONENT, + name=node_id, + properties={}, + tags=[] + ) + diagram.elements.append(element) + + def _extract_metadata(self, content: str) -> Dict[str, Any]: + """Extract metadata from Mermaid content.""" + metadata = {} + + # Extract title if present + title_match = re.search(r'title\s+(.+)', content, re.IGNORECASE) + if title_match: + metadata['title'] = title_match.group(1).strip() + + return metadata + + def _extract_global_tags(self, content: str) -> List[str]: + """Extract global tags from Mermaid content.""" + tags = [] + + # Look for CSS class assignments or style definitions + class_matches = re.findall(r'class\s+\w+\s+(\w+)', content) + tags.extend(class_matches) + + return list(set(tags)) # Remove duplicates \ No newline at end of file diff --git a/src/parsers/plantuml_parser.py b/src/parsers/plantuml_parser.py new file mode 100644 index 00000000..4d994b91 --- /dev/null +++ b/src/parsers/plantuml_parser.py @@ -0,0 +1,256 @@ +""" +PlantUML parser for extracting diagram information. + +This parser handles PlantUML format files (.puml, .plantuml) and extracts +classes, relationships, and other diagram elements. +""" + +import re +from typing import List, Dict, Any, Optional +from .base_parser import BaseParser, ParsedDiagram, DiagramElement, DiagramRelationship +from .base_parser import DiagramType, ElementType, ParseError + + +class PlantUMLParser(BaseParser): + """Parser for PlantUML diagram files.""" + + @property + def supported_extensions(self) -> List[str]: + return ['.puml', '.plantuml', '.pu'] + + @property + def diagram_type(self) -> DiagramType: + return DiagramType.PLANTUML + + def parse(self, content: str, source_file: str = "") -> ParsedDiagram: + """Parse PlantUML content and extract diagram information.""" + try: + diagram = ParsedDiagram( + diagram_type=self.diagram_type, + source_file=source_file + ) + + # Clean content - remove comments and normalize whitespace + cleaned_content = self._clean_content(content) + + # Extract metadata (title, skinparam, etc.) + diagram.metadata = self._extract_metadata(cleaned_content) + + # Extract elements (classes, interfaces, components, etc.) + diagram.elements = self._extract_elements(cleaned_content) + + # Extract relationships (associations, inheritance, etc.) + diagram.relationships = self._extract_relationships(cleaned_content) + + # Extract global tags + diagram.tags = self._extract_global_tags(cleaned_content) + + return diagram + + except Exception as e: + raise ParseError(f"Failed to parse PlantUML content: {str(e)}") + + def _clean_content(self, content: str) -> str: + """Clean content by removing comments and normalizing whitespace.""" + # Remove multi-line comments first (PlantUML uses /' comment '/ format) + content = re.sub(r"/\'.*?'/", "", content, flags=re.DOTALL) + + # Remove single-line comments + content = re.sub(r"'.*$", "", content, flags=re.MULTILINE) + + # Normalize whitespace + content = re.sub(r'\s+', ' ', content) + + return content.strip() + + def _extract_metadata(self, content: str) -> Dict[str, Any]: + """Extract metadata like title, skinparam, etc.""" + metadata = {} + + # Extract title + title_match = re.search(r'title\s+([^\n\r]+)', content, re.IGNORECASE) + if title_match: + metadata['title'] = title_match.group(1).strip() + + # Extract skinparam settings + skinparams = re.findall(r'skinparam\s+(\w+)\s+([^\n\r]+)', content, re.IGNORECASE) + if skinparams: + metadata['skinparams'] = {param: value.strip() for param, value in skinparams} + + # Extract notes + notes = re.findall(r'note\s+(?:left|right|top|bottom|as\s+\w+)\s*:\s*([^\n\r]+)', + content, re.IGNORECASE) + if notes: + metadata['notes'] = [note.strip() for note in notes] + + return metadata + + def _extract_elements(self, content: str) -> List[DiagramElement]: + """Extract diagram elements (classes, interfaces, etc.).""" + elements = [] + + # Class definitions + class_pattern = r'class\s+(\w+)(?:\s*<<(.+?)>>)?\s*(?:\{(.*?)\})?' + for match in re.finditer(class_pattern, content, re.IGNORECASE | re.DOTALL): + name = match.group(1) + stereotype = match.group(2) if match.group(2) else None + body = match.group(3) if match.group(3) else "" + + properties = self._parse_class_body(body) + if stereotype: + properties['stereotype'] = stereotype.strip() + + element = DiagramElement( + id=name, + element_type=ElementType.CLASS, + name=name, + properties=properties, + tags=self._extract_element_tags(name, content) + ) + elements.append(element) + + # Interface definitions + interface_pattern = r'interface\s+(\w+)(?:\s*<<(.+?)>>)?\s*(?:\{(.*?)\})?' + for match in re.finditer(interface_pattern, content, re.IGNORECASE | re.DOTALL): + name = match.group(1) + stereotype = match.group(2) if match.group(2) else None + body = match.group(3) if match.group(3) else "" + + properties = self._parse_class_body(body) + if stereotype: + properties['stereotype'] = stereotype.strip() + + element = DiagramElement( + id=name, + element_type=ElementType.INTERFACE, + name=name, + properties=properties, + tags=self._extract_element_tags(name, content) + ) + elements.append(element) + + # Actor definitions + actor_pattern = r'actor\s+(\w+)(?:\s+as\s+(\w+))?' + for match in re.finditer(actor_pattern, content, re.IGNORECASE): + name = match.group(1) + alias = match.group(2) if match.group(2) else name + + element = DiagramElement( + id=alias, + element_type=ElementType.ACTOR, + name=name, + properties={'alias': alias} if alias != name else {}, + tags=self._extract_element_tags(name, content) + ) + elements.append(element) + + # Component definitions + component_pattern = r'component\s+(\w+)(?:\s+as\s+(\w+))?' + for match in re.finditer(component_pattern, content, re.IGNORECASE): + name = match.group(1) + alias = match.group(2) if match.group(2) else name + + element = DiagramElement( + id=alias, + element_type=ElementType.COMPONENT, + name=name, + properties={'alias': alias} if alias != name else {}, + tags=self._extract_element_tags(name, content) + ) + elements.append(element) + + return elements + + def _parse_class_body(self, body: str) -> Dict[str, Any]: + """Parse class body to extract methods and attributes.""" + properties = {'methods': [], 'attributes': []} + + if not body: + return properties + + lines = [line.strip() for line in body.split('\n') if line.strip()] + + for line in lines: + # Skip empty lines and separators + if not line or line in ['--', '..', '==']: + continue + + # Method pattern (has parentheses) + if '(' in line and ')' in line: + properties['methods'].append(line) + else: + # Attribute pattern + properties['attributes'].append(line) + + return properties + + def _extract_relationships(self, content: str) -> List[DiagramRelationship]: + """Extract relationships between elements.""" + relationships = [] + + # Common relationship patterns + patterns = [ + # Inheritance: A --|> B, A <|-- B + (r'(\w+)\s*<\|--\s*(\w+)', 'inheritance', 'reverse'), + (r'(\w+)\s*--\|>\s*(\w+)', 'inheritance', 'normal'), + + # Composition: A *-- B, B --* A + (r'(\w+)\s*\*--\s*(\w+)', 'composition', 'normal'), + (r'(\w+)\s*--\*\s*(\w+)', 'composition', 'reverse'), + + # Aggregation: A o-- B, B --o A + (r'(\w+)\s*o--\s*(\w+)', 'aggregation', 'normal'), + (r'(\w+)\s*--o\s*(\w+)', 'aggregation', 'reverse'), + + # Association: A -- B, A --> B + (r'(\w+)\s*-->\s*(\w+)', 'association', 'normal'), + (r'(\w+)\s*<--\s*(\w+)', 'association', 'reverse'), + (r'(\w+)\s*--\s*(\w+)(?!\*|o|\|)', 'association', 'normal'), + + # Dependency: A ..> B, A <.. B + (r'(\w+)\s*\.\.>\s*(\w+)', 'dependency', 'normal'), + (r'(\w+)\s*<\.\.\s*(\w+)', 'dependency', 'reverse'), + ] + + rel_id = 1 + for pattern, rel_type, direction in patterns: + for match in re.finditer(pattern, content, re.IGNORECASE): + source = match.group(1) if direction == 'normal' else match.group(2) + target = match.group(2) if direction == 'normal' else match.group(1) + + relationship = DiagramRelationship( + id=f"rel_{rel_id}", + source_id=source, + target_id=target, + relationship_type=rel_type, + properties={'direction': direction}, + tags=[] + ) + relationships.append(relationship) + rel_id += 1 + + return relationships + + def _extract_element_tags(self, element_name: str, content: str) -> List[str]: + """Extract tags specific to an element.""" + tags = [] + + # Look for tagged annotations near the element + tag_pattern = rf'{re.escape(element_name)}\s*:\s*#(\w+)' + for match in re.finditer(tag_pattern, content, re.IGNORECASE): + tags.append(match.group(1)) + + return tags + + def _extract_global_tags(self, content: str) -> List[str]: + """Extract global tags from the diagram.""" + tags = [] + + # Look for global tag annotations + tag_pattern = r'#(\w+)' + for match in re.finditer(tag_pattern, content): + tag = match.group(1) + if tag not in tags: + tags.append(tag) + + return tags \ No newline at end of file diff --git a/test/unit/parsers/test_base_parser.py b/test/unit/parsers/test_base_parser.py new file mode 100644 index 00000000..d4f0e5ff --- /dev/null +++ b/test/unit/parsers/test_base_parser.py @@ -0,0 +1,175 @@ +""" +Tests for the base parser interface. +""" + +import pytest +from src.parsers.base_parser import ( + BaseParser, ParsedDiagram, DiagramElement, DiagramRelationship, + DiagramType, ElementType, ParseError +) + + +class TestDiagramElement: + """Test DiagramElement dataclass.""" + + def test_element_creation(self): + """Test creating a diagram element.""" + element = DiagramElement( + id="test_id", + element_type=ElementType.CLASS, + name="TestClass", + properties={"visibility": "public"}, + position={"x": 10.0, "y": 20.0}, + tags=["important"] + ) + + assert element.id == "test_id" + assert element.element_type == ElementType.CLASS + assert element.name == "TestClass" + assert element.properties["visibility"] == "public" + assert element.position["x"] == 10.0 + assert element.tags == ["important"] + + def test_element_default_values(self): + """Test element creation with default values.""" + element = DiagramElement( + id="simple", + element_type=ElementType.COMPONENT, + name="Simple" + ) + + assert element.properties == {} + assert element.position is None + assert element.tags == [] + + +class TestDiagramRelationship: + """Test DiagramRelationship dataclass.""" + + def test_relationship_creation(self): + """Test creating a diagram relationship.""" + relationship = DiagramRelationship( + id="rel_1", + source_id="A", + target_id="B", + relationship_type="inheritance", + properties={"multiplicity": "1..*"}, + tags=["important"] + ) + + assert relationship.id == "rel_1" + assert relationship.source_id == "A" + assert relationship.target_id == "B" + assert relationship.relationship_type == "inheritance" + assert relationship.properties["multiplicity"] == "1..*" + assert relationship.tags == ["important"] + + +class TestParsedDiagram: + """Test ParsedDiagram dataclass.""" + + def test_diagram_creation(self): + """Test creating a parsed diagram.""" + element = DiagramElement( + id="elem1", + element_type=ElementType.CLASS, + name="TestClass" + ) + + relationship = DiagramRelationship( + id="rel1", + source_id="elem1", + target_id="elem2", + relationship_type="association" + ) + + diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, + source_file="test.puml", + elements=[element], + relationships=[relationship], + metadata={"title": "Test Diagram"}, + tags=["test"] + ) + + assert diagram.diagram_type == DiagramType.PLANTUML + assert diagram.source_file == "test.puml" + assert len(diagram.elements) == 1 + assert len(diagram.relationships) == 1 + assert diagram.metadata["title"] == "Test Diagram" + assert diagram.tags == ["test"] + + +class MockParser(BaseParser): + """Mock parser for testing base functionality.""" + + @property + def supported_extensions(self): + return ['.mock'] + + @property + def diagram_type(self): + return DiagramType.PLANTUML + + def parse(self, content, source_file=""): + return ParsedDiagram( + diagram_type=self.diagram_type, + source_file=source_file + ) + + +class TestBaseParser: + """Test BaseParser abstract base class.""" + + def test_validate_extension(self): + """Test file extension validation.""" + parser = MockParser() + + assert parser.validate_extension("test.mock") is True + assert parser.validate_extension("test.MOCK") is True + assert parser.validate_extension("test.txt") is False + + def test_parse_file(self, tmp_path): + """Test parsing from file.""" + parser = MockParser() + + # Create test file + test_file = tmp_path / "test.mock" + test_file.write_text("test content") + + result = parser.parse_file(str(test_file)) + + assert result.diagram_type == DiagramType.PLANTUML + assert result.source_file == str(test_file) + + def test_abstract_methods(self): + """Test that BaseParser is properly abstract.""" + with pytest.raises(TypeError): + BaseParser() + + +class TestEnums: + """Test enum definitions.""" + + def test_diagram_type_enum(self): + """Test DiagramType enum values.""" + assert DiagramType.PLANTUML.value == "plantuml" + assert DiagramType.MERMAID.value == "mermaid" + assert DiagramType.DRAWIO.value == "drawio" + + def test_element_type_enum(self): + """Test ElementType enum values.""" + assert ElementType.CLASS.value == "class" + assert ElementType.INTERFACE.value == "interface" + assert ElementType.COMPONENT.value == "component" + assert ElementType.ACTOR.value == "actor" + + +class TestParseError: + """Test ParseError exception.""" + + def test_parse_error_creation(self): + """Test creating ParseError.""" + error = ParseError("Test error message") + assert str(error) == "Test error message" + assert isinstance(error, Exception) \ No newline at end of file diff --git a/test/unit/parsers/test_database.py b/test/unit/parsers/test_database.py new file mode 100644 index 00000000..a179bf15 --- /dev/null +++ b/test/unit/parsers/test_database.py @@ -0,0 +1,334 @@ +""" +Tests for database models and utilities. +""" + +import pytest +import tempfile +import os +from pathlib import Path + +from src.parsers.database.models import DiagramDatabase, DiagramRecord, ElementRecord, RelationshipRecord +from src.parsers.database.utils import ( + export_diagram_to_json, get_diagram_statistics, find_orphaned_elements, + validate_diagram_integrity +) +from src.parsers.base_parser import ParsedDiagram, DiagramElement, DiagramRelationship +from src.parsers.base_parser import DiagramType, ElementType + + +class TestDiagramDatabase: + """Test DiagramDatabase functionality.""" + + def setup_method(self): + """Set up test database.""" + # Use temporary file for test database + self.temp_db = tempfile.NamedTemporaryFile(delete=False, suffix='.db') + self.temp_db.close() + self.db = DiagramDatabase(self.temp_db.name) + + def teardown_method(self): + """Clean up test database.""" + if os.path.exists(self.temp_db.name): + os.unlink(self.temp_db.name) + + def test_database_initialization(self): + """Test database initialization and schema creation.""" + # Database should be created and initialized + assert os.path.exists(self.temp_db.name) + + # Should be able to get empty diagrams list + diagrams = self.db.get_all_diagrams() + assert diagrams == [] + + def test_store_and_retrieve_diagram(self): + """Test storing and retrieving a parsed diagram.""" + # Create test diagram + element = DiagramElement( + id="test_element", + element_type=ElementType.CLASS, + name="TestClass", + properties={"visibility": "public"}, + position={"x": 10.0, "y": 20.0}, + tags=["important"] + ) + + relationship = DiagramRelationship( + id="test_rel", + source_id="test_element", + target_id="other_element", + relationship_type="association", + properties={"multiplicity": "1..*"}, + tags=["key_relation"] + ) + + diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, + source_file="test.puml", + elements=[element], + relationships=[relationship], + metadata={"title": "Test Diagram"}, + tags=["test", "example"] + ) + + # Store diagram + diagram_id = self.db.store_diagram(diagram) + assert diagram_id > 0 + + # Retrieve diagram + stored_diagram = self.db.get_diagram(diagram_id) + assert stored_diagram is not None + assert stored_diagram.source_file == "test.puml" + assert stored_diagram.diagram_type == "plantuml" + assert stored_diagram.metadata["title"] == "Test Diagram" + assert "test" in stored_diagram.tags + + # Retrieve elements + elements = self.db.get_elements(diagram_id) + assert len(elements) == 1 + assert elements[0].element_id == "test_element" + assert elements[0].name == "TestClass" + assert elements[0].properties["visibility"] == "public" + + # Retrieve relationships + relationships = self.db.get_relationships(diagram_id) + assert len(relationships) == 1 + assert relationships[0].relationship_id == "test_rel" + assert relationships[0].source_element_id == "test_element" + assert relationships[0].relationship_type == "association" + + def test_search_elements_by_type(self): + """Test searching elements by type.""" + # Create and store test diagram + element1 = DiagramElement( + id="class1", + element_type=ElementType.CLASS, + name="TestClass" + ) + + element2 = DiagramElement( + id="interface1", + element_type=ElementType.INTERFACE, + name="TestInterface" + ) + + diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, + source_file="test.puml", + elements=[element1, element2] + ) + + diagram_id = self.db.store_diagram(diagram) + + # Search for classes + class_elements = self.db.search_elements_by_type("class") + assert len(class_elements) == 1 + assert class_elements[0].name == "TestClass" + + # Search for interfaces + interface_elements = self.db.search_elements_by_type("interface") + assert len(interface_elements) == 1 + assert interface_elements[0].name == "TestInterface" + + def test_search_by_tags(self): + """Test searching by tags.""" + # Create elements with tags + element = DiagramElement( + id="tagged_element", + element_type=ElementType.CLASS, + name="TaggedClass", + tags=["important", "api"] + ) + + relationship = DiagramRelationship( + id="tagged_rel", + source_id="a", + target_id="b", + relationship_type="association", + tags=["core", "api"] + ) + + diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, + source_file="test.puml", + elements=[element], + relationships=[relationship], + tags=["system", "core"] + ) + + diagram_id = self.db.store_diagram(diagram) + + # Search by tag + results = self.db.search_by_tags(["api"]) + + # Should find both element and relationship + assert len(results["elements"]) == 1 + assert len(results["relationships"]) == 1 + assert results["elements"][0]["name"] == "TaggedClass" + + # Search by diagram tag + core_results = self.db.search_by_tags(["core"]) + assert len(core_results["diagrams"]) == 1 + assert len(core_results["relationships"]) == 1 + + def test_delete_diagram(self): + """Test deleting a diagram.""" + # Create and store test diagram + element = DiagramElement( + id="test_element", + element_type=ElementType.CLASS, + name="TestClass" + ) + + diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, + source_file="test.puml", + elements=[element] + ) + + diagram_id = self.db.store_diagram(diagram) + + # Verify diagram exists + assert self.db.get_diagram(diagram_id) is not None + assert len(self.db.get_elements(diagram_id)) == 1 + + # Delete diagram + success = self.db.delete_diagram(diagram_id) + assert success is True + + # Verify diagram is deleted + assert self.db.get_diagram(diagram_id) is None + assert len(self.db.get_elements(diagram_id)) == 0 + + +class TestDatabaseUtils: + """Test database utility functions.""" + + def setup_method(self): + """Set up test database with sample data.""" + self.temp_db = tempfile.NamedTemporaryFile(delete=False, suffix='.db') + self.temp_db.close() + self.db = DiagramDatabase(self.temp_db.name) + + # Create sample diagram + elements = [ + DiagramElement( + id="class1", + element_type=ElementType.CLASS, + name="ClassA", + tags=["important"] + ), + DiagramElement( + id="class2", + element_type=ElementType.CLASS, + name="ClassB" + ), + DiagramElement( + id="orphan", + element_type=ElementType.CLASS, + name="OrphanClass" + ) + ] + + relationships = [ + DiagramRelationship( + id="rel1", + source_id="class1", + target_id="class2", + relationship_type="inheritance" + ) + ] + + diagram = ParsedDiagram( + diagram_type=DiagramType.PLANTUML, + source_file="test.puml", + elements=elements, + relationships=relationships, + tags=["test"] + ) + + self.diagram_id = self.db.store_diagram(diagram) + + def teardown_method(self): + """Clean up test database.""" + if os.path.exists(self.temp_db.name): + os.unlink(self.temp_db.name) + + def test_export_diagram_to_json(self): + """Test exporting diagram to JSON.""" + json_data = export_diagram_to_json(self.db, self.diagram_id) + + assert "diagram" in json_data + assert "elements" in json_data + assert "relationships" in json_data + + # Check diagram data + diagram_data = json_data["diagram"] + assert diagram_data["source_file"] == "test.puml" + assert diagram_data["diagram_type"] == "plantuml" + + # Check elements + assert len(json_data["elements"]) == 3 + class_names = [elem["name"] for elem in json_data["elements"]] + assert "ClassA" in class_names + assert "ClassB" in class_names + assert "OrphanClass" in class_names + + # Check relationships + assert len(json_data["relationships"]) == 1 + assert json_data["relationships"][0]["relationship_type"] == "inheritance" + + def test_get_diagram_statistics(self): + """Test getting diagram statistics.""" + stats = get_diagram_statistics(self.db, self.diagram_id) + + assert stats["total_elements"] == 3 + assert stats["total_relationships"] == 1 + assert stats["element_type_counts"]["class"] == 3 + assert stats["relationship_type_counts"]["inheritance"] == 1 + assert stats["tag_counts"]["important"] == 1 + assert stats["unique_tags"] == 1 + + def test_find_orphaned_elements(self): + """Test finding orphaned elements.""" + orphaned = find_orphaned_elements(self.db, self.diagram_id) + + assert len(orphaned) == 1 + assert orphaned[0].element_id == "orphan" + assert orphaned[0].name == "OrphanClass" + + def test_validate_diagram_integrity(self): + """Test diagram integrity validation.""" + issues = validate_diagram_integrity(self.db, self.diagram_id) + + # Should find orphaned element + assert "orphan" in issues["orphaned_elements"] + + # Should not find missing elements (all referenced elements exist) + assert len(issues["missing_elements"]) == 0 + + # Should not find duplicate IDs + assert len(issues["duplicate_element_ids"]) == 0 + + def test_export_elements_to_csv(self, tmp_path): + """Test exporting elements to CSV.""" + from src.parsers.database.utils import export_elements_to_csv + + csv_file = tmp_path / "elements.csv" + export_elements_to_csv(self.db, self.diagram_id, csv_file) + + # Check if file was created + assert csv_file.exists() + + # Read and verify content + content = csv_file.read_text() + assert "element_id" in content # Header + assert "ClassA" in content + assert "ClassB" in content + assert "OrphanClass" in content + + def test_export_nonexistent_diagram(self): + """Test exporting nonexistent diagram.""" + with pytest.raises(ValueError) as exc_info: + export_diagram_to_json(self.db, 99999) + + assert "not found" in str(exc_info.value) \ No newline at end of file diff --git a/test/unit/parsers/test_drawio_parser.py b/test/unit/parsers/test_drawio_parser.py new file mode 100644 index 00000000..5a7f6f64 --- /dev/null +++ b/test/unit/parsers/test_drawio_parser.py @@ -0,0 +1,259 @@ +""" +Tests for DrawIO parser. +""" + +import pytest +from src.parsers.drawio_parser import DrawIOParser +from src.parsers.base_parser import DiagramType, ElementType, ParseError + + +class TestDrawIOParser: + """Test DrawIO parser functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.parser = DrawIOParser() + + def test_supported_extensions(self): + """Test supported file extensions.""" + extensions = self.parser.supported_extensions + assert '.drawio' in extensions + assert '.xml' in extensions + + def test_diagram_type(self): + """Test diagram type property.""" + assert self.parser.diagram_type == DiagramType.DRAWIO + + def test_parse_simple_xml(self): + """Test parsing simple XML structure.""" + content = """ + + + + + + + + + + + + + """ + + result = self.parser.parse(content, "test.drawio") + + assert result.diagram_type == DiagramType.DRAWIO + assert result.source_file == "test.drawio" + assert "host" in result.metadata + assert "modified" in result.metadata + + def test_parse_style_properties(self): + """Test parsing style properties.""" + style = "rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf" + + properties = self.parser._parse_style(style) + + assert properties["rounded"] == "0" + assert properties["whiteSpace"] == "wrap" + assert properties["html"] == "1" + assert properties["fillColor"] == "#dae8fc" + assert properties["strokeColor"] == "#6c8ebf" + + def test_determine_element_type(self): + """Test element type determination.""" + # Test actor + actor_style = "shape=umlActor;verticalLabelPosition=bottom" + assert self.parser._determine_element_type(actor_style, "") == ElementType.ACTOR + + # Test interface + interface_style = "ellipse;whiteSpace=wrap" + assert self.parser._determine_element_type(interface_style, "I:TestInterface") == ElementType.INTERFACE + + # Test class + class_style = "rounded=0;whiteSpace=wrap" + assert self.parser._determine_element_type(class_style, "TestClass") == ElementType.CLASS + + # Test note + note_style = "shape=note;whiteSpace=wrap" + assert self.parser._determine_element_type(note_style, "") == ElementType.NOTE + + def test_determine_relationship_type(self): + """Test relationship type determination.""" + # Test inheritance + inheritance_style = "endArrow=block;endFill=0" + assert self.parser._determine_relationship_type(inheritance_style, "extends") == "inheritance" + + # Test composition + composition_style = "endArrow=diamond;endFill=1" + assert self.parser._determine_relationship_type(composition_style, "") == "composition" + + # Test dependency + dependency_style = "dashed=1;endArrow=open" + assert self.parser._determine_relationship_type(dependency_style, "") == "dependency" + + # Test association (default) + association_style = "endArrow=none" + assert self.parser._determine_relationship_type(association_style, "") == "association" + + def test_extract_text_content(self): + """Test text content extraction.""" + # Test simple text + simple_text = "TestClass" + assert self.parser._extract_text_content(simple_text) == "TestClass" + + # Test HTML content + html_text = "<b>TestClass</b>" + assert self.parser._extract_text_content(html_text) == "TestClass" + + # Test with HTML tags + tagged_text = "

TestClass

" + assert self.parser._extract_text_content(tagged_text) == "TestClass" + + def test_extract_element_tags(self): + """Test element tag extraction.""" + style = "shape=rectangle;fillColor=#dae8fc;strokeColor=#6c8ebf" + value = "class TestClass" + + tags = self.parser._extract_element_tags(style, value) + + # Should extract style-based tags + assert any("shape:rectangle" in tag for tag in tags) + assert any("fillColor:#dae8fc" in tag for tag in tags) + + # Should extract content-based tags + assert "class" in tags + + def test_parse_direct_xml_elements(self): + """Test parsing XML elements directly.""" + content = """ + + + + + + + + + + + + + + + """ + + result = self.parser.parse(content) + + # Should parse elements + assert len(result.elements) == 2 + + # Check element properties + test_class = next((e for e in result.elements if e.name == "TestClass"), None) + assert test_class is not None + assert test_class.id == "class1" + assert test_class.position is not None + assert test_class.position["x"] == 160.0 + assert test_class.position["y"] == 80.0 + + # Should parse relationships + assert len(result.relationships) == 1 + relationship = result.relationships[0] + assert relationship.source_id == "class1" + assert relationship.target_id == "class2" + + def test_parse_empty_content(self): + """Test parsing empty or minimal content.""" + content = """ + + + + """ + + result = self.parser.parse(content) + + assert result.diagram_type == DiagramType.DRAWIO + assert len(result.elements) == 0 + assert len(result.relationships) == 0 + + def test_parse_invalid_xml(self): + """Test error handling for invalid XML.""" + invalid_content = "This is not valid XML content" + + with pytest.raises(ParseError) as exc_info: + self.parser.parse(invalid_content) + + assert "Invalid XML format" in str(exc_info.value) + + def test_parse_with_geometry(self): + """Test parsing elements with geometry information.""" + content = """ + + + + + + + + + """ + + result = self.parser.parse(content) + + assert len(result.elements) == 1 + element = result.elements[0] + + # Check position + assert element.position is not None + assert element.position["x"] == 100.0 + assert element.position["y"] == 200.0 + + # Check size in properties + assert "size" in element.properties + assert element.properties["size"]["width"] == 150.0 + assert element.properties["size"]["height"] == 80.0 + + def test_decode_diagram_data(self): + """Test decoding compressed diagram data.""" + # Test with empty data + assert self.parser._decode_diagram_data("") is None + assert self.parser._decode_diagram_data(None) is None + + # Test with invalid data (should not crash) + invalid_data = "invalid_base64_data" + result = self.parser._decode_diagram_data(invalid_data) + # Should return None or handle gracefully + assert result is None or isinstance(result, str) + + def test_parse_connector_properties(self): + """Test parsing connector properties.""" + content = """ + + + + + + + + + + + + + + + """ + + result = self.parser.parse(content) + + # Should have elements and relationship + assert len(result.elements) == 2 + assert len(result.relationships) == 1 + + relationship = result.relationships[0] + assert relationship.source_id == "src" + assert relationship.target_id == "tgt" + assert relationship.properties.get("label") == "connects" + + # Should detect dashed style as dependency + assert relationship.relationship_type == "dependency" \ No newline at end of file diff --git a/test/unit/parsers/test_mermaid_parser.py b/test/unit/parsers/test_mermaid_parser.py new file mode 100644 index 00000000..dea4d3a5 --- /dev/null +++ b/test/unit/parsers/test_mermaid_parser.py @@ -0,0 +1,290 @@ +""" +Tests for Mermaid parser. +""" + +import pytest +from src.parsers.mermaid_parser import MermaidParser +from src.parsers.base_parser import DiagramType, ElementType, ParseError + + +class TestMermaidParser: + """Test Mermaid parser functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.parser = MermaidParser() + + def test_supported_extensions(self): + """Test supported file extensions.""" + extensions = self.parser.supported_extensions + assert '.mmd' in extensions + assert '.mermaid' in extensions + + def test_diagram_type(self): + """Test diagram type property.""" + assert self.parser.diagram_type == DiagramType.MERMAID + + def test_detect_mermaid_type(self): + """Test Mermaid diagram type detection.""" + class_content = "classDiagram\nclass TestClass" + assert self.parser._detect_mermaid_type(class_content) == "classDiagram" + + sequence_content = "sequenceDiagram\nA->>B: Message" + assert self.parser._detect_mermaid_type(sequence_content) == "sequenceDiagram" + + flowchart_content = "flowchart TD\nA --> B" + assert self.parser._detect_mermaid_type(flowchart_content) == "flowchart" + + graph_content = "graph LR\nA --> B" + assert self.parser._detect_mermaid_type(graph_content) == "graph" + + def test_parse_class_diagram(self): + """Test parsing Mermaid class diagram.""" + content = """ + classDiagram + class Animal { + +String name + +int age + +makeSound() void + } + class Dog { + +String breed + +bark() void + } + Animal <|-- Dog + """ + + result = self.parser.parse(content, "test.mmd") + + assert result.diagram_type == DiagramType.MERMAID + assert result.source_file == "test.mmd" + assert result.metadata["mermaid_type"] == "classDiagram" + + # Should have parsed classes + class_elements = [e for e in result.elements if e.element_type == ElementType.CLASS] + assert len(class_elements) >= 1 + + # Should have parsed inheritance relationship + inheritance_rels = [r for r in result.relationships if r.relationship_type == "inheritance"] + assert len(inheritance_rels) >= 1 + + def test_parse_flowchart(self): + """Test parsing Mermaid flowchart.""" + content = """ + flowchart TD + A[Start] --> B{Decision} + B -->|Yes| C[Process 1] + B -->|No| D[Process 2] + C --> E[End] + D --> E + """ + + result = self.parser.parse(content) + + assert result.metadata["mermaid_type"] == "flowchart" + + # Should have parsed nodes + assert len(result.elements) >= 4 + + # Check for different node shapes + rect_nodes = [e for e in result.elements if e.properties.get("shape") == "rectangular"] + diamond_nodes = [e for e in result.elements if e.properties.get("shape") == "diamond"] + + assert len(rect_nodes) >= 1 + assert len(diamond_nodes) >= 1 + + # Should have parsed connections + assert len(result.relationships) >= 4 + + def test_parse_sequence_diagram(self): + """Test parsing Mermaid sequence diagram.""" + content = """ + sequenceDiagram + participant A as Alice + participant B as Bob + A->>B: Hello Bob, how are you? + B-->>A: Great! + A->>B: See you later! + """ + + result = self.parser.parse(content) + + assert result.metadata["mermaid_type"] == "sequenceDiagram" + + # Should have parsed participants + actors = [e for e in result.elements if e.element_type == ElementType.ACTOR] + assert len(actors) == 2 + + # Check participant names + alice = next((a for a in actors if a.name == "Alice"), None) + bob = next((a for a in actors if a.name == "Bob"), None) + assert alice is not None + assert bob is not None + + # Should have parsed messages + assert len(result.relationships) >= 3 + + # Check message types + async_msgs = [r for r in result.relationships if r.relationship_type == "async_message"] + return_msgs = [r for r in result.relationships if r.relationship_type == "return_message"] + + assert len(async_msgs) >= 2 + assert len(return_msgs) >= 1 + + def test_parse_er_diagram(self): + """Test parsing Mermaid ER diagram.""" + content = """ + erDiagram + CUSTOMER { + string name + string email + int age + } + ORDER { + int order_id + date order_date + float total + } + CUSTOMER ||--o{ ORDER : places + """ + + result = self.parser.parse(content) + + assert result.metadata["mermaid_type"] == "erDiagram" + + # Should have parsed entities + entities = [e for e in result.elements if e.element_type == ElementType.ENTITY] + assert len(entities) == 2 + + # Check entity attributes + customer = next((e for e in entities if e.name == "CUSTOMER"), None) + assert customer is not None + assert len(customer.properties.get("attributes", [])) == 3 + + # Should have parsed relationship + assert len(result.relationships) == 1 + relationship = result.relationships[0] + assert relationship.relationship_type == "one_to_many" + + def test_parse_graph_diagram(self): + """Test parsing Mermaid graph diagram.""" + content = """ + graph LR + A --> B + A --> C + B --> D + C --> D + """ + + result = self.parser.parse(content) + + assert result.metadata["mermaid_type"] == "graph" + + # Should have parsed nodes and connections + assert len(result.elements) >= 4 + assert len(result.relationships) >= 4 + + def test_clean_content(self): + """Test content cleaning functionality.""" + content = """ + classDiagram + %% This is a comment + class TestClass { + +method() void + } + %% Another comment + class AnotherClass + """ + + cleaned = self.parser._clean_content(content) + + assert "This is a comment" not in cleaned + assert "Another comment" not in cleaned + assert "TestClass" in cleaned + assert "AnotherClass" in cleaned + + def test_parse_class_relationships(self): + """Test parsing various class relationships.""" + content = """ + classDiagram + class A + class B + class C + class D + + A <|-- B + A *-- C + A o-- D + A --> B + A ..> C + """ + + result = self.parser.parse(content) + + assert len(result.relationships) >= 4 + + rel_types = [rel.relationship_type for rel in result.relationships] + assert "inheritance" in rel_types + assert "composition" in rel_types + assert "aggregation" in rel_types + assert "association" in rel_types + + def test_parse_flowchart_node_shapes(self): + """Test parsing different flowchart node shapes.""" + content = """ + flowchart TD + A[Rectangle] + B(Rounded) + C{Diamond} + D((Circle)) + """ + + result = self.parser.parse(content) + + # Should recognize different shapes + shapes = [e.properties.get("shape") for e in result.elements] + assert "rectangular" in shapes + assert "rounded" in shapes + assert "diamond" in shapes + assert "circle" in shapes + + def test_parse_empty_content(self): + """Test parsing empty content.""" + result = self.parser.parse("") + + assert result.diagram_type == DiagramType.MERMAID + assert len(result.elements) == 0 + assert len(result.relationships) == 0 + + def test_parse_unknown_diagram_type(self): + """Test parsing unknown diagram type.""" + content = """ + unknownDiagram + A --> B + """ + + result = self.parser.parse(content) + + assert result.metadata["mermaid_type"] == "unknown" + # Should still attempt to parse generically + assert isinstance(result.elements, list) + assert isinstance(result.relationships, list) + + def test_parse_sequence_with_auto_participants(self): + """Test sequence diagram that auto-creates participants.""" + content = """ + sequenceDiagram + Alice->>Bob: Hello + Bob->>Charlie: Forward message + """ + + result = self.parser.parse(content) + + # Should auto-create participants + actors = [e for e in result.elements if e.element_type == ElementType.ACTOR] + assert len(actors) == 3 + + participant_names = [a.name for a in actors] + assert "Alice" in participant_names + assert "Bob" in participant_names + assert "Charlie" in participant_names \ No newline at end of file diff --git a/test/unit/parsers/test_plantuml_parser.py b/test/unit/parsers/test_plantuml_parser.py new file mode 100644 index 00000000..cd89971c --- /dev/null +++ b/test/unit/parsers/test_plantuml_parser.py @@ -0,0 +1,271 @@ +""" +Tests for PlantUML parser. +""" + +import pytest +from src.parsers.plantuml_parser import PlantUMLParser +from src.parsers.base_parser import DiagramType, ElementType, ParseError + + +class TestPlantUMLParser: + """Test PlantUML parser functionality.""" + + def setup_method(self): + """Set up test fixtures.""" + self.parser = PlantUMLParser() + + def test_supported_extensions(self): + """Test supported file extensions.""" + extensions = self.parser.supported_extensions + assert '.puml' in extensions + assert '.plantuml' in extensions + assert '.pu' in extensions + + def test_diagram_type(self): + """Test diagram type property.""" + assert self.parser.diagram_type == DiagramType.PLANTUML + + def test_parse_simple_class(self): + """Test parsing a simple class definition.""" + content = """ + @startuml + class TestClass { + +attribute1 : String + -attribute2 : int + +method1() : void + } + @enduml + """ + + result = self.parser.parse(content, "test.puml") + + assert result.diagram_type == DiagramType.PLANTUML + assert result.source_file == "test.puml" + assert len(result.elements) == 1 + + element = result.elements[0] + assert element.id == "TestClass" + assert element.element_type == ElementType.CLASS + assert element.name == "TestClass" + assert "attributes" in element.properties + assert "methods" in element.properties + + def test_parse_interface(self): + """Test parsing interface definition.""" + content = """ + @startuml + interface ITestInterface { + +method1() : void + +method2(param : String) : int + } + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.elements) == 1 + element = result.elements[0] + assert element.element_type == ElementType.INTERFACE + assert element.name == "ITestInterface" + + def test_parse_actor(self): + """Test parsing actor definition.""" + content = """ + @startuml + actor User + actor Administrator as Admin + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.elements) == 2 + + user_element = next(e for e in result.elements if e.name == "User") + assert user_element.element_type == ElementType.ACTOR + assert user_element.id == "User" + + admin_element = next(e for e in result.elements if e.name == "Administrator") + assert admin_element.element_type == ElementType.ACTOR + assert admin_element.id == "Admin" + + def test_parse_inheritance_relationship(self): + """Test parsing inheritance relationships.""" + content = """ + @startuml + class Parent + class Child + Parent <|-- Child + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.elements) == 2 + assert len(result.relationships) == 1 + + relationship = result.relationships[0] + assert relationship.relationship_type == "inheritance" + assert relationship.source_id == "Parent" + assert relationship.target_id == "Child" + + def test_parse_composition_relationship(self): + """Test parsing composition relationships.""" + content = """ + @startuml + class Car + class Engine + Car *-- Engine + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.relationships) == 1 + relationship = result.relationships[0] + assert relationship.relationship_type == "composition" + assert relationship.source_id == "Car" + assert relationship.target_id == "Engine" + + def test_parse_association_relationship(self): + """Test parsing association relationships.""" + content = """ + @startuml + class Person + class Company + Person --> Company : works for + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.relationships) == 1 + relationship = result.relationships[0] + assert relationship.relationship_type == "association" + assert relationship.source_id == "Person" + assert relationship.target_id == "Company" + + def test_parse_metadata(self): + """Test parsing diagram metadata.""" + content = """ + @startuml + title Test Diagram Title + skinparam classBackgroundColor lightblue + skinparam classBorderColor black + + class TestClass + + note right : This is a test note + @enduml + """ + + result = self.parser.parse(content) + + assert "title" in result.metadata + assert result.metadata["title"] == "Test Diagram Title" + assert "skinparams" in result.metadata + assert "notes" in result.metadata + + def test_clean_content(self): + """Test content cleaning functionality.""" + content = """ + @startuml + ' This is a comment + class TestClass { + +method() : void + } + /' This is a + multi-line comment '/ + class AnotherClass + @enduml + """ + + cleaned = self.parser._clean_content(content) + + # Comments should be removed + assert "This is a comment" not in cleaned + assert "multi-line comment" not in cleaned + # Important content should remain + assert "TestClass" in cleaned + assert "AnotherClass" in cleaned + + def test_parse_class_with_stereotype(self): + """Test parsing class with stereotype.""" + content = """ + @startuml + class TestClass <> { + +id : Long + } + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.elements) == 1 + element = result.elements[0] + assert element.properties.get("stereotype") == "Entity" + + def test_parse_component(self): + """Test parsing component definition.""" + content = """ + @startuml + component WebServer + component Database as DB + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.elements) == 2 + + web_server = next(e for e in result.elements if e.name == "WebServer") + assert web_server.element_type == ElementType.COMPONENT + + database = next(e for e in result.elements if e.name == "Database") + assert database.element_type == ElementType.COMPONENT + assert database.id == "DB" + + def test_parse_empty_content(self): + """Test parsing empty content.""" + result = self.parser.parse("") + + assert result.diagram_type == DiagramType.PLANTUML + assert len(result.elements) == 0 + assert len(result.relationships) == 0 + + def test_parse_invalid_content(self): + """Test error handling for invalid content.""" + # This should not raise an exception, but should handle gracefully + content = "invalid plantuml content" + result = self.parser.parse(content) + + assert result.diagram_type == DiagramType.PLANTUML + # Should still return a result, even if empty + assert isinstance(result.elements, list) + assert isinstance(result.relationships, list) + + def test_parse_multiple_relationships(self): + """Test parsing multiple relationship types.""" + content = """ + @startuml + class A + class B + class C + class D + + A --|> B : inheritance + A *-- C : composition + A o-- D : aggregation + A ..> B : dependency + @enduml + """ + + result = self.parser.parse(content) + + assert len(result.relationships) == 4 + + rel_types = [rel.relationship_type for rel in result.relationships] + assert "inheritance" in rel_types + assert "composition" in rel_types + assert "aggregation" in rel_types + assert "dependency" in rel_types \ No newline at end of file