In [None]:
%pip install ollama pyyaml

Note: you may need to restart the kernel to use updated packages.


In [None]:
import ollama
from pathlib import Path
from typing import List, Dict, Set
import re
from collections import defaultdict
import yaml
from datetime import datetime

def generate_metadata(content: str, model: str) -> Dict:
    """Generate rich metadata using local LLM"""
    prompt = f"""
    Analyze this note content and generate metadata following these rules:
    1. Create -3 main tags (lowercase-with-dashes)
    2. Identify 2-5 related concepts (PascalCase)
    3. Write a 1-sentence summary
    4. Suggest 1-3 aliases
    5. Find 1-2 categories
    
    Content: {content[:2000]}
    """
    
    response = ollama.generate(
        model=model,
        prompt=prompt,
        format="json",
        options={"temperature": 0.2}
    )
    
    try:
        return response.json().get("response", {})
    except:
        return {}

def create_frontmatter(metadata: Dict, source_note: str) -> str:
    """Create YAML frontmatter with validation"""
    frontmatter = {
        'created': datetime.now().isoformat(),
        'modified': datetime.now().isoformat(),
        'source': f"[[{source_note}]]",
        'tags': metadata.get('tags', []),
        'aliases': metadata.get('aliases', []),
        'summary': metadata.get('summary', ''),
        'category': metadata.get('category', ''),
        'links': {
            'outgoing': metadata.get('concepts', []),
            'backlinks': []
        }
    }
    
    return yaml.safe_dump(frontmatter, sort_keys=False, allow_unicode=True)

def process_note_with_metadata(note_path: Path, output_dir: Path, model: str):
    """Process individual notes with full metadata support"""
    with open(note_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    sections = re.split(r'\n## ', content)
    base_name = note_path.stem
    rel_path = note_path.relative_to(output_dir.parent / "Notes")
    
    # Generate note-wide metadata
    global_metadata = generate_metadata(content, model)
    
    for i, section in enumerate(sections):
        if not section.strip():
            continue
            
        lines = section.split('\n')
        title = lines[0].strip('#').strip()
        body = '\n'.join(lines[1:])
        full_content = f"{title}\n\n{body}"
        
        # Generate section-specific metadata
        section_metadata = generate_metadata(full_content, model)
        combined_metadata = {**global_metadata, **section_metadata}
        
        # Create frontmatter
        frontmatter = create_frontmatter(combined_metadata, base_name)
        
        # Build note content
        note_content = f"---\n{frontmatter}\n---\n\n"
        note_content += f"# {title}\n\n"
        note_content += "## Content\n" + body + "\n\n"
        note_content += "## Connections\n"
        note_content += "- [[_MOC|Main Map of Content]]\n"
        note_content += "- [[_TAGS|Tag Index]]\n"
        
        # Save note
        new_name = f"{base_name} - {title}" if title else f"{base_name} - Section {i+1}"
        output_path = output_dir / rel_path.parent / f"{new_name}.md"
        output_path.parent.mkdir(parents=True, exist_ok=True)
        output_path.write_text(note_content, encoding='utf-8')

def create_global_indices(output_dir: Path):
    """Create global metadata indices"""
    # Tag index
    tag_index = defaultdict(list)
    # MOC index
    moc_content = "# Global Map of Content\n\n"
    
    for note in output_dir.glob("**/*.md"):
        if note.name.startswith('_'):
            continue
            
        with open(note, 'r', encoding='utf-8') as f:
            frontmatter = next(yaml.safe_load_all(f))
            
            # Populate tag index
            for tag in frontmatter.get('tags', []):
                tag_index[tag].append(note.relative_to(output_dir))
            
            # Add to MOC
            moc_content += f"- [[{note.relative_to(output_dir)}]]\n"
    
    # Write tag index
    tag_path = output_dir / "_TAGS.md"
    tag_content = "# Tag Index\n\n"
    for tag, notes in tag_index.items():
        tag_content += f"## {tag}\n" + '\n'.join(
            f"- [[{note}]]" for note in notes
        ) + "\n\n"
    tag_path.write_text(tag_content, encoding='utf-8')
    
    # Write global MOC
    moc_path = output_dir / "_MOC.md"
    moc_path.write_text(moc_content, encoding='utf-8')

if __name__ == "__main__":
    VAULT_ROOT = Path("/home/vikk/Documents/GitHub/College-Notes/")
    INPUT_DIR = VAULT_ROOT / "Notes"
    OUTPUT_DIR = VAULT_ROOT / "Meta_Notes"
    MODEL = "mistral-small:22b"
    
    # Process notes
    for note_path in INPUT_DIR.glob("**/*.md"):
        if "MOC" not in note_path.name:
            process_note_with_metadata(note_path, OUTPUT_DIR, MODEL)
    
    # Create global indices
    create_global_indices(OUTPUT_DIR)
    
    print(f"Metadata-enhanced vault created at: {OUTPUT_DIR}")