In [1]:
"""
@file sortMessages.py
@brief Python script to sort messageReference.ts entries alphabetically for each language
@author Henry Letellier
@date 2025-10-05

This script reads the messageReference.ts file and sorts all message entries
alphabetically within each language section while preserving the exact
function signatures and implementations.
"""

'\n@file sortMessages.py\n@brief Python script to sort messageReference.ts entries alphabetically for each language\n@author Henry Letellier\n@date 2025-10-05\n\nThis script reads the messageReference.ts file and sorts all message entries\nalphabetically within each language section while preserving the exact\nfunction signatures and implementations.\n'

In [2]:
# Import dependencies
import re
import os
import sys
from typing import List, Dict, Optional, NamedTuple, Union
import shutil
from pathlib import Path

In [3]:
# The file to process
FILE_PATH: str = os.path.abspath(
    os.path.join(
        os.getcwd(),
        "..",
        "..",
        "..",
        "vscode",
        "asperheader",
        "src",
        "modules",
        "messageReference.ts"
    )
)
# This is just debug info to provide a bit of data for you if you wish
print(f"Input file: {FILE_PATH}")
print(f"Path exists: {os.path.exists(FILE_PATH)}")
print(f"Path points to a file: {os.path.isfile(FILE_PATH)}")

Input file: /home/eletellier/Documents/001_github/Asperguide/asper-header/vscode/asperheader/src/modules/messageReference.ts
Path exists: True
Path points to a file: True


In [4]:
# The output name
OUTPUT_NAME: str = os.path.abspath(
    os.path.join(
        os.getcwd(),
        "..",
        "..",
        "..",
        "vscode",
        "asperheader",
        "src",
        "modules",
        "messageReference_sorted.ts"
    )
)
# This is just debug info to provide a bit of data for you if you wish
print(f"Output file: {OUTPUT_NAME}")
print(f"Path exists: {os.path.exists(OUTPUT_NAME)}")
print(f"Path points to a file: {os.path.isfile(OUTPUT_NAME)}")

Output file: /home/eletellier/Documents/001_github/Asperguide/asper-header/vscode/asperheader/src/modules/messageReference_sorted.ts
Path exists: False
Path points to a file: False


In [5]:
# Define structures
## MessageEntry
class MessageEntry(NamedTuple):
    key: str
    full_content: str

In [6]:
## LanguageSection
class LanguageSection(NamedTuple):
    language: str
    start_line: int
    end_line: int
    entries: List[MessageEntry]

In [7]:
# Class in charge of sorting the messages in the nodes

class MessageSorter:
    def __init__(self, file_path: str, output_file: str):
        self.output_file = output_file
        self.file_path = file_path
        with open(file_path, 'r', encoding='utf-8') as f:
            self.content = f.read()
        self.lines = self.content.split('\n')

    def extract_language_section(self, language_code: str) -> Optional[LanguageSection]:
        """Extract a complete language section with all its message entries"""
        lang_pattern = f'"{language_code}": {{'
        start_line = -1

        # Find the start of the language section
        for i, line in enumerate(self.lines):
            if lang_pattern in line:
                start_line = i
                break

        if start_line == -1:
            print(f"Warning: Language {language_code} not found")
            return None

        # Find the end of the language section
        end_line = -1
        for i in range(start_line + 1, len(self.lines)):
            line = self.lines[i].strip()
            # Look for next language or closing brace
            if ((line.startswith('"') and '": {' in line and not line.startswith('        ')) or line == '},'):
                end_line = i
                break

        if end_line == -1:
            print(f"Warning: End of language {language_code} not found")
            return None

        # Extract message entries
        entries = []
        i = start_line + 1

        while i < end_line:
            line = self.lines[i]

            # Check if this line starts a message entry
            if line.strip() and line.startswith('        ') and ':' in line:
                key_match = re.match(r'^\s+([a-zA-Z][a-zA-Z0-9]*): ', line)

                if key_match:
                    key = key_match.group(1)
                    entry_lines = [line]
                    j = i + 1

                    # Collect all lines belonging to this entry (multi-line support)
                    while j < end_line:
                        next_line = self.lines[j]

                        # Check if we've reached the next entry
                        if (next_line.strip() and next_line.startswith('        ') and re.match(r'^\s+[a-zA-Z][a-zA-Z0-9]*: ', next_line)):
                            break

                        entry_lines.append(next_line)
                        j += 1

                    # Join the entry and clean it up
                    full_content = '\n'.join(entry_lines).rstrip()

                    # Ensure proper comma handling
                    if not full_content.endswith(','):
                        full_content += ','

                    entries.append(MessageEntry(key, full_content))
                    i = j
                else:
                    i += 1
            else:
                i += 1

        return LanguageSection(language_code, start_line, end_line, entries)

    def create_sorted_section(self, section: LanguageSection) -> List[str]:
        """Sort entries alphabetically and create new section content"""
        # Sort entries by key
        sorted_entries = sorted(section.entries, key=lambda x: x.key.lower())

        # Create new section lines
        new_lines = []
        new_lines.append(self.lines[section.start_line])  # Opening line: "lang": {

        # Add sorted entries
        for i, entry in enumerate(sorted_entries):
            entry_content = entry.full_content

            # Remove comma from last entry
            if i == len(sorted_entries) - 1:
                entry_content = re.sub(r',\s*$', '', entry_content)

            new_lines.append(entry_content)

        new_lines.append('    },')  # Closing line

        return new_lines

    def sort_all_languages(self) -> None:
        """Sort all language sections in the file"""
        languages = [
            'en', 'fr', 'it', 'es', 'de', 'ja', 'ko', 'ru', 
            'pt-br', 'tr', 'pl', 'cs', 'hu', 'zh-cn', 'zh-tw'
        ]

        print('🔄 Starting to sort message entries...\n')

        # Extract all sections
        sections = []
        for lang in languages:
            section = self.extract_language_section(lang)
            if section:
                sections.append(section)
                print(f'✅ Extracted {len(section.entries)} entries for {lang}')

        if not sections:
            print('❌ No language sections found!')
            return

        # Sort sections and rebuild file
        new_lines = self.lines.copy()
        offset = 0

        for section in sections:
            print(f'\n🔄 Sorting {section.language}...')

            # Check if already sorted
            current_keys = [entry.key for entry in section.entries]
            sorted_keys = sorted(current_keys, key=str.lower)
            is_already_sorted = current_keys == sorted_keys

            if is_already_sorted:
                print(f'✅ {section.language} is already sorted!')
                continue

            print(f'📝 Sorting {len(section.entries)} entries for {section.language}')
            print(f'   First key was: {current_keys[0]} -> now: {sorted_keys[0]}')

            # Create sorted content
            sorted_section_lines = self.create_sorted_section(section)

            # Calculate adjusted positions with offset
            adjusted_start_line = section.start_line + offset
            adjusted_end_line = section.end_line + offset
            original_section_length = adjusted_end_line - adjusted_start_line + 1

            # Replace the section in new_lines
            new_lines[adjusted_start_line:adjusted_start_line + original_section_length] = sorted_section_lines

            # Update offset for next sections
            offset += len(sorted_section_lines) - original_section_length

            print(f'✅ Sorted {section.language} successfully')

        # Write the new content
        new_content = '\n'.join(new_lines)
        backup_path = self.file_path + '.backup'

        # Create backup
        shutil.copy2(self.file_path, backup_path)
        print(f'\n💾 Created backup: {backup_path}')

        # Write sorted file
        with open(self.output_file, 'w', encoding='utf-8', newline='\n') as f:
            f.write(new_content)
        print(f'✅ File sorted successfully: {self.output_file}')

        # Summary
        print('\n📊 Summary:')
        print(f'   Languages processed: {len(sections)}')
        print(f'   Total entries per language: ~{len(sections[0].entries) if sections else 0}')
        print(f'   Backup created: {os.path.basename(backup_path)}')
        print('\n🎉 All message entries are now sorted alphabetically!')

    def verify_sorting(self) -> bool:
        """Verify the sorting was successful"""
        print('\n🔍 Verifying sorting...')

        languages = [
            'en', 'fr', 'it', 'es', 'de', 'ja', 'ko', 'ru', 
            'pt-br', 'tr', 'pl', 'cs', 'hu', 'zh-cn', 'zh-tw'
        ]
        all_sorted = True

        # Re-read the file
        with open(self.output_file, 'r', encoding='utf-8') as f:
            self.content = f.read()
        self.lines = self.content.split('\n')

        for lang in languages:
            section = self.extract_language_section(lang)
            if section:
                keys = [entry.key for entry in section.entries]
                sorted_keys = sorted(keys, key=str.lower)
                is_sorted = keys == sorted_keys

                print(f'   {lang}: {"✅ Sorted" if is_sorted else "❌ Not sorted"}')

                if not is_sorted:
                    all_sorted = False
                    print(f'      Expected: {sorted_keys[0]} ... {sorted_keys[-1]}')
                    print(f'      Got:      {keys[0]} ... {keys[-1]}')

        if all_sorted:
            print('\n🎉 All languages are properly sorted alphabetically!')
        else:
            print('\n⚠️  Some languages are not properly sorted.')
        return all_sorted
    
    def get_missing_sentences(self, src_lang:str, target_lang: str, sorted: bool = True) -> Union[List[MessageEntry], None]:
        """Extract the sentences that are not present in the target language"""
        missing_sentences: List[MessageEntry] = []
        # Re-read the file
        with open(self.output_file, 'r', encoding='utf-8') as f:
            self.content = f.read()
        self.lines = self.content.split('\n')

        src_lang_content: Union[LanguageSection, None] = self.extract_language_section(src_lang)
        dest_lang_content: Union[LanguageSection, None] = self.extract_language_section(target_lang)
        if not src_lang_content or not dest_lang_content:
            print("The src_lang_content or dest_lang_content is not present.")
            return
        print(f"Gathered src_lang_content: {src_lang_content}")
        print(f"Gathered dest_lang_content: {dest_lang_content}")
        iterA = None
        iterB = None
        if len(src_lang_content.entries) >= len(dest_lang_content.entries):
            iterA = src_lang_content
            iterB = dest_lang_content
        else:
            iterA = dest_lang_content
            iterB = src_lang_content
        for sentence_src in iterA.entries:
            sentence_found:bool = False
            for sentence_dest in iterB.entries:
                if sentence_src.key == sentence_dest.key:
                    sentence_found = True
                    break
            if sentence_found is False:
                print(f"sentence {sentence_src.key} not found in {target_lang}")
                missing_sentences.append(sentence_src)
        print(f"Final sentences not present: {missing_sentences}")
        return missing_sentences

In [8]:
# The main function of the program
message_file_path = Path(FILE_PATH)

if not message_file_path.exists():
    print(f'❌ File not found: {message_file_path}')
    sys.exit(1)


In [9]:
print(f'📁 Processing file: {message_file_path}\n')
loaded=False

try:
    sorter = MessageSorter(str(message_file_path), OUTPUT_NAME)
    loaded=True
except Exception as error:
    print(f'❌ Error occurred: {error}')

📁 Processing file: /home/eletellier/Documents/001_github/Asperguide/asper-header/vscode/asperheader/src/modules/messageReference.ts



In [10]:
if loaded:
    # Sort all languages
    try:
        sorter.sort_all_languages()
    except Exception as error:
        print(f'❌ Error occurred: {error}')

🔄 Starting to sort message entries...

✅ Extracted 139 entries for en
✅ Extracted 139 entries for fr
✅ Extracted 139 entries for it
✅ Extracted 139 entries for es
✅ Extracted 139 entries for de
✅ Extracted 139 entries for ja
✅ Extracted 139 entries for ko
✅ Extracted 139 entries for ru
✅ Extracted 139 entries for pt-br
✅ Extracted 139 entries for tr
✅ Extracted 139 entries for pl
✅ Extracted 139 entries for cs
✅ Extracted 139 entries for hu
✅ Extracted 139 entries for zh-cn
✅ Extracted 139 entries for zh-tw

🔄 Sorting en...
📝 Sorting 139 entries for en
   First key was: alternateFilePathUpdated -> now: alternateFilePathUpdated
✅ Sorted en successfully

🔄 Sorting fr...
✅ fr is already sorted!

🔄 Sorting it...
📝 Sorting 139 entries for it
   First key was: alternateFilePathUpdated -> now: alternateFilePathUpdated
✅ Sorted it successfully

🔄 Sorting es...
✅ es is already sorted!

🔄 Sorting de...
📝 Sorting 139 entries for de
   First key was: alternateFilePathUpdated -> now: alternateFileP

In [11]:
if loaded:
    # Verify the result
    try:
        sorter.verify_sorting()
    except Exception as error:
        print(f'❌ Error occurred: {error}')


🔍 Verifying sorting...
   en: ✅ Sorted
   fr: ✅ Sorted
   it: ✅ Sorted
   es: ✅ Sorted
   de: ✅ Sorted
   ja: ✅ Sorted
   ko: ✅ Sorted
   ru: ✅ Sorted
   pt-br: ✅ Sorted
   tr: ✅ Sorted
   pl: ✅ Sorted
   cs: ✅ Sorted
   hu: ✅ Sorted
   zh-cn: ✅ Sorted
   zh-tw: ✅ Sorted

🎉 All languages are properly sorted alphabetically!


In [12]:
languages = [
            'fr', 'it', 'es', 'de', 'ja', 'ko', 'ru', 
            'pt-br', 'tr', 'pl', 'cs', 'hu', 'zh-cn', 'zh-tw'
        ]
missing_sentences: Dict[str, Union[None, List[MessageEntry]]] = {}
if loaded:
    source_language: str = "en"
    for lang in languages:
        try:
            print(f"Checking language '{source_language}' against '{lang}'")
            missing_sentences[lang] = sorter.get_missing_sentences(source_language,lang)
        except Exception as error:
            print(f'❌ Error occurred: {error}')

Checking language 'en' against 'fr'
Gathered src_lang_content: LanguageSection(language='en', start_line=332, end_line=472, entries=[MessageEntry(key='alternateFilePathUpdated', full_content='        alternateFilePathUpdated: (oldFilePath: string, newFilePath: string): string => `The alternate path has been updated from ${oldFilePath} to ${newFilePath}.`,'), MessageEntry(key='alternateLogoDirectoryNotFound', full_content='        alternateLogoDirectoryNotFound: (alternateRootDirectory: string, error: string = "Not Provided"): string => `The alternate logo root directory \'${alternateRootDirectory}\' was not found, error: \'${error}\'.`,'), MessageEntry(key='alternateLogoDirectoryNotProvided', full_content='        alternateLogoDirectoryNotProvided: (): string => `No alternate logo directory provided.`,'), MessageEntry(key='arrayNodeContent', full_content='        arrayNodeContent: (arrayName: string, arrayIndex: number, arrayNode: any[]): string => `${arrayName}[${arrayIndex}] = ${JSON

In [13]:
# Displaying missing sentences per language
for key, value in missing_sentences.items():
    if not value:
        print(f"{key}: {value}")
    else:
        print(f"{key}:")
        for i in value:
            print(f"\t{i.full_content}")

fr: []
it: []
es: []
de: []
ja: []
ko: []
ru: []
pt-br: []
tr: []
pl: []
cs: []
hu: []
zh-cn: []
zh-tw: []
