In [1]:
import ollama
from pathlib import Path
from typing import List, Dict
import re
from collections import defaultdict
import yaml
from datetime import datetime

def generate_metadata(content: str, model: str) -> Dict:
    """Generate metadata using local LLM"""
    try:
        if not content.strip():
            return {}
            
        prompt = f"""
        Analyze this text and extract key concepts following these rules:
        1. Identify primary concept (PascalCase)
        2. List 2-5 related concepts (PascalCase)
        3. Generate 1-3 tags (lowercase-with-dashes)
        4. Create a 1-sentence summary
        
        Text: {content[:2000]}
        """
        
        response = ollama.generate(
            model=model,
            prompt=prompt,
            format="json",
            options={"temperature": 0.2}
        )
        return response.json().get("response", {})
    except:
        return {}

def generate_ai_content(title: str, concepts: List[str], folder_hierarchy: List[str], model: str) -> str:
    """Generate content using AI with folder context"""
    try:
        context_path = " > ".join(folder_hierarchy)
        prompt = f"""
        Generate comprehensive content for: {title}
        Context Hierarchy: {context_path}
        Include:
        - Core definitions
        - Practical applications
        - Relationships to parent concepts
        - Simple examples
        Use academic tone with Markdown sections
        """
        
        response = ollama.generate(
            model=model,
            prompt=prompt,
            options={"temperature": 0.5}
        )
        return f"> **AI Generated Content**\n{response['response']}"
    except:
        return "> **AI Generation Failed** - Content placeholder"

def process_note_with_metadata(note_path: Path, output_dir: Path, model: str, notes_root: Path):
    """Process notes with proper folder structure preservation"""
    rel_path = note_path.relative_to(notes_root)
    print(f"\n📁 Processing: {rel_path}")
    
    with open(note_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    sections = re.split(r'\n## ', content)
    print(f"  🔍 Found {len(sections)} sections")
    
    folder_hierarchy = list(rel_path.parent.parts)
    note_stem = note_path.stem
    output_folder = output_dir.joinpath(*folder_hierarchy, note_stem)
    output_folder.mkdir(parents=True, exist_ok=True)
    
    for i, section in enumerate(sections, 1):
        if not section.strip():
            continue
            
        lines = section.split('\n')
        original_title = lines[0].strip('#').strip()
        body = '\n'.join(lines[1:])  # Initialize body here
        print(f"  📝 Section {i}: {original_title}")
        
        # Generate metadata
        print("    ⚙️  Generating metadata...")
        metadata = generate_metadata(f"{original_title}\n\n{body}", model)
        concepts = metadata.get('concepts', [])
        ai_generated = False
        
        # Generate content if empty or lacks subsections
        if not body.strip() or not re.search(r'^#+ ', body, flags=re.MULTILINE):
            print("    🤖 Generating AI content...")
            body = generate_ai_content(original_title, concepts, folder_hierarchy, model)
            ai_generated = True
        
        # Create sanitized filename
        sanitized_name = re.sub(r'[^\w\-_]', '', original_title.replace(' ', '-')).lower()
        output_path = output_folder / f"{sanitized_name}.md"
        
        # Handle duplicates
        counter = 1
        while output_path.exists():
            output_path = output_folder / f"{sanitized_name}-{counter}.md"
            counter += 1
        
        # Create frontmatter
        frontmatter = {
            'created': datetime.now().isoformat(),
            'modified': datetime.now().isoformat(),
            'source': f"[[{note_stem}]]",
            'hierarchy': folder_hierarchy,
            'tags': metadata.get('tags', []),
            'summary': metadata.get('summary', ''),
            'concepts': concepts,
            'ai_generated': ai_generated
        }
        yaml_front = yaml.safe_dump(frontmatter, sort_keys=False, allow_unicode=True)
        
        # Build note content
        note_content = f"---\n{yaml_front}\n---\n\n"
        note_content += f"# {original_title}\n\n"
        note_content += f"## Context Path\n{' > '.join(folder_hierarchy)}\n\n" if folder_hierarchy else ""
        note_content += "## Content\n" + body + "\n\n"
        note_content += "## Related Concepts\n" + '\n'.join(f"[[{c}]]" for c in concepts[1:])
        
        output_path.write_text(note_content, encoding='utf-8')
        print(f"    💾 Saved to: {output_path.relative_to(output_dir)}")

def create_global_indices(output_dir: Path):
    """Create hierarchical indices based on folder structure"""
    print("\n📚 Building global indices...")
    index_content = "# Knowledge Hierarchy Index\n\n"
    
    # Walk through the output directory
    for path in sorted(output_dir.glob("**/*.md")):
        if path.name.startswith('_'):
            continue
            
        # Calculate depth based on folder structure
        relative_path = path.relative_to(output_dir)
        depth = len(relative_path.parent.parts)
        indent = "  " * depth
        
        # Get display name
        display_name = relative_path.stem.replace('-', ' ')
        
        # Create hierarchical link
        parent_folders = "/".join(relative_path.parent.parts)
        index_content += f"{indent}- [[{parent_folders}/{display_name}]]\n"
    
    index_path = output_dir / "_HIERARCHY.md"
    index_path.write_text(index_content, encoding='utf-8')
    print(f"    📖 Index created at: {index_path}")

if __name__ == "__main__":
    VAULT_ROOT = Path("/home/vikk/Documents/GitHub/College-Notes")
    NOTES_ROOT = VAULT_ROOT / "Notes"
    OUTPUT_DIR = VAULT_ROOT / "Structured_Notes"
    MODEL = "mistral-small:22b"
    
    print("🚀 Starting note processing pipeline")
    print(f"🔧 Using model: {MODEL}")
    print(f"📂 Input directory: {NOTES_ROOT}")
    print(f"📂 Output directory: {OUTPUT_DIR}")
    
    # Process notes
    note_count = 0
    for note_path in NOTES_ROOT.glob("**/*.md"):
        if "MOC" not in note_path.name:
            note_count += 1
            process_note_with_metadata(note_path, OUTPUT_DIR, MODEL, NOTES_ROOT)
    
    # Create global indices
    create_global_indices(OUTPUT_DIR)
    
    print(f"\n✅ Processing complete! Processed {note_count} notes")
    print(f"🌐 Index available at: {OUTPUT_DIR}/_HIERARCHY.md")

🚀 Starting note processing pipeline
🔧 Using model: mistral-small:22b
📂 Input directory: /home/vikk/Documents/GitHub/College-Notes/Notes
📂 Output directory: /home/vikk/Documents/GitHub/College-Notes/Structured_Notes

📁 Processing: Temp notes.md
  🔍 Found 1 sections
  📝 Section 1: ___
    ⚙️  Generating metadata...


UnboundLocalError: cannot access local variable 'body' where it is not associated with a value