BruinGrowly · BruinGrowly · Nov 3, 2025 · Nov 3, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -38,12 +38,12 @@ jobs:
 
       - name: Test with pytest
         run: |
-          pytest
+          python -m pytest
 
       - name: Check Code Harmony
         run: |
           # v1.2+: Harmony check with automatic exit codes
           # Note: Currently informational as source has some disharmony
           # (This demonstrates the tool working - it found semantic issues!)
-          find src -name "*.py" -type f | xargs harmonizer || echo "⚠️ Disharmony found (tool is working correctly!)"
+          find harmonizer -name "*.py" -type f | xargs harmonizer || echo "⚠️ Disharmony found (tool is working correctly!)"
         continue-on-error: true
diff --git a/.harmonizer.yml.template b/.harmonizer.yml.template
@@ -1,125 +1,55 @@
-# Python Code Harmonizer Configuration Template
+# Python Code Harmonizer Configuration File
+# ------------------------------------------
+# This file allows you to customize the behavior of the Harmonizer to
+# better suit your project's specific needs.
 #
-# NOTE: Configuration file support is planned for future release
-# This template shows what configuration will look like when implemented
+# You can save this file as '.harmonizer.yml' in your project's root
+# directory.
+
+# File and Directory Exclusion
+# -----------------------------
+# Specify a list of file or directory patterns to exclude from analysis.
+# This is useful for ignoring virtual environments, test suites, or
+# generated code.
 #
-# Copy this file to .harmonizer.yml in your project root
-# The harmonizer will read this configuration automatically
-
-# Disharmony threshold (functions above this are flagged)
-# Default: 0.5
-# Range: 0.0 (very strict) to 2.0 (very lenient)
-threshold: 0.5
-
-# Output format
-# Options: table, json, csv
-# Default: table
-output_format: table
-
-# Severity level definitions
-severity_levels:
-  critical: 1.2    # Score >= 1.2
-  high: 0.8        # Score >= 0.8
-  medium: 0.5      # Score >= 0.5
-  low: 0.3         # Score >= 0.3
-  excellent: 0.0   # Score < 0.3
-
-# Files and patterns to ignore
-ignore_patterns:
-  - "**/test_*.py"           # Test files
-  - "**/tests/*.py"          # Test directories
-  - "**/migrations/*.py"     # Database migrations
-  - "**/*_test.py"           # Alternative test naming
-  - "**/conftest.py"         # Pytest configuration
-  - "**/__pycache__/**"      # Python cache
-  - "**/.venv/**"            # Virtual environments
-
-# Files and patterns to include (overrides ignore if specified)
-include_patterns:
-  - "src/**/*.py"            # Source files
-  - "app/**/*.py"            # Application files
-  # - "scripts/**/*.py"      # Uncomment to include scripts
-
-# Fail build in CI/CD if any function exceeds this threshold
-# Set to null to never fail builds
-# Default: null (warnings only)
-fail_threshold: null
-# fail_threshold: 1.0       # Uncomment to fail on critical disharmony
-
-# Enable verbose output
-# Default: false
-verbose: false
-
-# Show function details in output
-# Default: true
-show_function_details: true
-
-# Sort results by score (descending)
-# Default: true
-sort_by_score: true
-
-# Color output (for terminal)
-# Default: true
-color_output: true
-
-# Custom vocabulary extensions
-# Add domain-specific semantic mappings
-# (Advanced: requires understanding of DIVE-V2 engine)
+# The patterns use standard glob syntax.
+exclude:
+  - 'venv/'          # Exclude a virtual environment directory
+  - 'tests/'         # Exclude the main test directory
+  - '**/test_*.py'  # Exclude any file starting with 'test_'
+  - 'docs/'          # Exclude the documentation directory
+  - 'build/'         # Exclude build artifacts
+  - '*.md'           # Exclude Markdown files
+
+# Custom Semantic Vocabulary
+# --------------------------
+# Extend the Harmonizer's built-in vocabulary with your own domain-specific
+# terms. This is a powerful feature that allows you to teach the Harmonizer
+# the unique language of your project.
+#
+# Map your custom keywords to one of the four core dimensions:
+#   - love:    Connection, communication, sharing, community
+#   - justice: Order, rules, validation, enforcement, structure
+#   - power:   Action, execution, modification, creation, deletion
+#   - wisdom:  Analysis, calculation, information retrieval, knowledge
+#
+# This is especially useful for business logic or scientific applications.
 custom_vocabulary:
-  # Example: Map domain-specific terms
-  # "authenticate": "justice"
-  # "authorize": "power"
-  # "notify": "love"
-
-# Report options
-report:
-  # Show summary statistics
-  show_summary: true
-
-  # Show only disharmonious functions
-  only_show_disharmony: false
-
-  # Include harmonious functions in output
-  include_harmonious: true
-
-  # Maximum functions to display (0 = unlimited)
-  max_display: 0
-
-# Future enhancement placeholders
-# These will be implemented in upcoming versions
-
-# auto_fix:
-#   enabled: false
-#   suggestions: true
-
-# metrics:
-#   track_over_time: false
-#   output_file: "harmony_metrics.json"
-
-# integrations:
-#   github:
-#     create_review_comments: false
-#   jira:
-#     create_tickets_for_critical: false
-
----
-
-# Example configurations for different use cases:
-
-# STRICT MODE (for new projects)
-# threshold: 0.3
-# fail_threshold: 0.5
-
-# LENIENT MODE (for legacy code cleanup)
-# threshold: 0.8
-# fail_threshold: 1.2
-
-# CI/CD MODE (fail on critical only)
-# threshold: 0.5
-# fail_threshold: 1.0
-# only_show_disharmony: true
-
-# DEVELOPMENT MODE (show everything)
-# threshold: 0.5
-# verbose: true
-# show_function_details: true
+  # Example for a financial application
+  invoice: justice
+  payment: power
+  ledger: justice
+  audit: wisdom
+  receipt: love  # Represents a communication/connection
+
+  # Example for a data science application
+  dataset: wisdom
+  train_model: power
+  predict: wisdom
+  visualize: love # Represents communication of results
+
+  # Example for a web application
+  user_profile: wisdom
+  session: love
+  database_query: justice
+  render_template: power
diff --git a/README.md b/README.md
@@ -218,6 +218,18 @@ def pop_cache_value(key):
 
 ---
 
+## Configuration
+
+The Harmonizer can be customized to fit your project's needs using a `.harmonizer.yml` file in your project's root directory.
+
+This allows you to:
+- **Exclude files and directories** from analysis (e.g., `tests/`, `venv/`).
+- **Define a custom vocabulary** to teach the Harmonizer about your project's specific domain language.
+
+For a complete guide to all available options, see the **[Configuration Documentation](docs/CONFIGURATION.md)**.
+
+---
+
 ## Integration Into Your Workflow
 
 ### GitHub Actions (CI/CD)

diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
@@ -0,0 +1,70 @@
+# Configuration
+
+The Python Code Harmonizer can be configured to better suit your project's needs using a `.harmonizer.yml` file placed in your project's root directory.
+
+This file allows you to customize file exclusion patterns and extend the Harmonizer's semantic vocabulary with your own domain-specific terms.
+
+## Configuration File Structure
+
+Here is an example of a `.harmonizer.yml` file with all available options:
+
+```yaml
+# .harmonizer.yml
+
+# File and Directory Exclusion
+exclude:
+  - 'venv/'
+  - 'tests/'
+  - '**/test_*.py'
+  - 'docs/'
+  - 'build/'
+  - '*.md'
+
+# Custom Semantic Vocabulary
+custom_vocabulary:
+  invoice: justice
+  payment: power
+  ledger: justice
+  audit: wisdom
+  receipt: love
+```
+
+## `exclude`
+
+The `exclude` key takes a list of glob patterns. Any file or directory matching these patterns will be ignored during analysis. This is useful for excluding virtual environments, test suites, documentation, or generated code.
+
+**Common Patterns:**
+
+-   `'venv/'`: Excludes a virtual environment directory.
+-   `'tests/'`: Excludes the main test directory.
+-   `'**/test_*.py'`: Excludes any file starting with `test_`.
+-   `'build/'`: Excludes build artifacts.
+-   `'*.md'`: Excludes all Markdown files.
+
+## `custom_vocabulary`
+
+The `custom_vocabulary` key allows you to extend the Harmonizer's built-in vocabulary with your own domain-specific terms. This is a powerful feature that lets you teach the Harmonizer the unique language of your project, making its analysis more accurate and relevant.
+
+Map your custom keywords to one of the four core dimensions:
+
+-   **`love`**: Connection, communication, sharing, community.
+-   **`justice`**: Order, rules, validation, enforcement, structure.
+-   **`power`**: Action, execution, modification, creation, deletion.
+-   **`wisdom`**: Analysis, calculation, information retrieval, knowledge.
+
+This is especially useful for business logic or scientific applications.
+
+**Examples:**
+
+-   **Financial Application:**
+    -   `invoice: justice`
+    -   `payment: power`
+    -   `ledger: justice`
+-   **Data Science Application:**
+    -   `dataset: wisdom`
+    -   `train_model: power`
+    -   `predict: wisdom`
+-   **Web Application:**
+    -   `user_profile: wisdom`
+    -   `session: love`
+    -   `render_template: power`
diff --git a/src/ast_semantic_parser.py → harmonizer/ast_semantic_parser.py b/src/ast_semantic_parser.py → harmonizer/ast_semantic_parser.py
diff --git a/src/divine_invitation_engine_V2.py → harmonizer/divine_invitation_engine_V2.py b/src/divine_invitation_engine_V2.py → harmonizer/divine_invitation_engine_V2.py
@@ -60,11 +60,34 @@ class SemanticResult:
 class VocabularyManager:
     """Optimized vocabulary management with caching"""
 
-    def __init__(self):
+    def __init__(self, custom_vocabulary: Optional[Dict[str, str]] = None):
         self._keyword_map: Dict[str, Dimension] = {}
         self._word_cache: Dict[str, Tuple[Coordinates, int]] = {}
         self._ice_dimension_map: Dict[Dimension, Dimension] = {}
         self._build_complete_vocabulary()
+        if custom_vocabulary:
+            self._apply_custom_vocabulary(custom_vocabulary)
+
+    def _apply_custom_vocabulary(self, custom_vocabulary: Dict[str, str]) -> None:
+        """Applies user-defined vocabulary from the config file."""
+        import sys
+
+        applied_count = 0
+        for word, dimension_str in custom_vocabulary.items():
+            try:
+                dimension = Dimension[dimension_str.upper()]
+                self._keyword_map[word.lower()] = dimension
+                applied_count += 1
+            except KeyError:
+                print(
+                    f"WARNING: Invalid dimension '{dimension_str}' for word '{word}' in config.",
+                    file=sys.stderr,
+                )
+        if applied_count > 0:
+            print(
+                f"INFO: Applied {applied_count} custom vocabulary entries.",
+                file=sys.stderr,
+            )
 
     def _build_complete_vocabulary(self) -> None:
         """Build optimized vocabulary from all components"""
@@ -719,13 +742,15 @@ class DivineInvitationSemanticEngine:
     High-performance facade integrating all specialized sub-engines.
     """
 
-    def __init__(self):
+    def __init__(self, config: Optional[Dict] = None):
         """Initialize optimized system"""
+        self.config = config if config else {}
         self.ENGINE_VERSION = "DIVE-V2 (Optimized Production)"
         self.ANCHOR_POINT = Coordinates(1.0, 1.0, 1.0, 1.0)
 
         # Build core components
-        self.vocabulary = VocabularyManager()
+        custom_vocabulary = self.config.get("custom_vocabulary", {})
+        self.vocabulary = VocabularyManager(custom_vocabulary=custom_vocabulary)
         self.semantic_analyzer = SemanticAnalyzer(self.vocabulary, self.ANCHOR_POINT)
 
         # Build specialized sub-engines