In [None]:
import os
import openai
from pathlib import Path
import shutil

# Removed API key 

class DocumentProcessor:
    def __init__(self, docs_path: str, backup_path: str):
        self.docs_path = Path(docs_path)
        self.backup_path = Path(backup_path)
        self.article_titles = {}  
        
    def get_mdx(self):
        """Returns list of all .mdx files in the docs directory"""
        return list(self.docs_path.rglob("*.mdx"))
    
    def find_titles(self):
        """Maps article headings to their paths"""
        mdx_files = self.get_mdx()
        
        for file_path in mdx_files:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                
            
            heading_line = [line for line in content.split('\n') if 'heading:' in line]
            if heading_line:
                heading = heading_line[0].split('heading:')[1].strip().strip('"').lstrip('◆').strip()
                relative_path = str(file_path.parent.relative_to(self.docs_path))
                hyperlink_path = f"/{relative_path.replace(os.sep, '/')}"
                self.article_titles[heading] = hyperlink_path
            
    def process_article(self, file_path: Path):
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            
        heading_line = [line for line in content.split('\n') if 'heading:' in line]
        current_heading = heading_line[0].split('heading:')[1].strip().strip('"').lstrip('◆').strip() if heading_line else ""
        
        filtered_titles = {
            title: path 
            for title, path in self.article_titles.items() 
            if title != current_heading
        }
        
        updated_content = self.call_chat(
            content=content,
            titles=filtered_titles,
            current_folder=str(file_path.parent)
        )
        
        if content != updated_content:
            self.save_results(file_path, content, updated_content)
            return True
        return False
    
    def call_chat(self, content: str, titles: dict, current_folder: str) -> str:
        title_list = "\n".join([f"- {title}: {path}" for title, path in titles.items()])
        
        prompt = f"""
        You are a Markdown editor. Here is a list of article titles and their paths:
        {title_list}

        Identify where these titles appear in the following Markdown content and 
        insert hyperlinks pointing to the corresponding paths. Return the updated content.

        Content:
        {content}

        Rules:
        1. **You cannot delete anything**, not even whitespace or extra lines.
        2. Do not modify existing hyperlinks [text](path)
        3. Do not modify or delete 'metadata.heading' anywhere
        4. Do not hyperlink 'introduction' 
        5. Matching should be case-insensitive
        6. Only hyperlink the first instance of each term
        7. Only hyperlink standalone terms (with spaces on both sides)
        8. Do not add or replace any '`' and do not write 'Markdown' anywhere
        9. Do not delete any citations that appear after terms, for example 'term (@citation)'

        """
        
        response = openai.ChatCompletion.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an expert Markdown content editor. Return only the raw content without any markdown formatting markers."},
                {"role": "user", "content": prompt}
            ]
        )
        
        updated_content = response['choices'][0]['message']['content']
        
       
        updated_content = updated_content.strip('`').strip()
        if updated_content.startswith('markdown\n'):
            updated_content = updated_content[9:]
            
        return updated_content
    
    def save_results(self, file_path: Path, original: str, updated: str):
        self.backup_path.mkdir(parents=True, exist_ok=True)
        
        backup_file = self.backup_path / file_path.name
        with open(backup_file, 'w', encoding='utf-8') as f:
            f.write(original)
            
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(updated)
            
        with open(self.backup_path / "change_log.txt", "a", encoding='utf-8') as log:
            log.write(f"Updated and backed up: {file_path}\n")
            
    def process_all_documents(self):
        """Main processing function"""
        self.find_titles()
        
        processed_count = 0
        modified_count = 0
        
        for file_path in self.get_mdx():
            print(f"Processing: {file_path}")
            processed_count += 1
            if self.process_article(file_path): 
                modified_count += 1
                
        print(f"\nProcessing complete!")
        print(f"Total files processed: {processed_count}")
        print(f"Files modified: {modified_count}")

def main():
    docs_path = r"" # removed for privacy
    backup_path = r"" # removed for privacy
    
    processor = DocumentProcessor(docs_path, backup_path)
    processor.process_all_documents()

if __name__ == "__main__":
    main()





Processing: C:\Users\cpola\Learn_Prompting_nextjs\app\(docs)\docs\agents\introduction\page.mdx
Processing: C:\Users\cpola\Learn_Prompting_nextjs\app\(docs)\docs\agents\mrkl\page.mdx
Processing: C:\Users\cpola\Learn_Prompting_nextjs\app\(docs)\docs\agents\pal\page.mdx
Processing: C:\Users\cpola\Learn_Prompting_nextjs\app\(docs)\docs\agents\react\page.mdx

Processing complete!
Total files processed: 4
Files modified: 4
