In [None]:
import ollama
from pathlib import Path
from typing import List, Dict
import re
from collections import defaultdict
import yaml
from datetime import datetime

def generate_metadata(content: str, model: str) -> Dict:
    """Generate metadata using local LLM"""
    try:
        if not content.strip():
            return {}
            
        prompt = f"""
        Analyze this text and extract key concepts following these rules:
        1. Identify unique technical terms and important concepts (PascalCase)
        2. Return primary concept as the first item
        3. Include 2-5 related concepts
        4. Generate 1-3 tags (lowercase-with-dashes)
        5. Create a 1-sentence summary
        
        Text: {content[:2000]}
        """
        
        response = ollama.generate(
            model=model,
            prompt=prompt,
            format="json",
            options={"temperature": 0.2}
        )
        return response.json().get("response", {})
    except:
        return {}

def generate_ai_content(title: str, concepts: List[str], model: str) -> str:
    """Generate content using AI when section is empty"""
    try:
        prompt = f"""
        Generate a comprehensive explanation about: {title}
        Focus on these aspects:
        - Core principles/concepts ({', '.join(concepts)})
        - Practical applications
        - Key relationships to other concepts
        - Simple examples
        Use academic tone with clear section headings
        """
        
        response = ollama.generate(
            model=model,
            prompt=prompt,
            options={"temperature": 0.5}
        )
        return f"> **AI Generated Content**\n{response['response']}"
    except:
        return "> **AI Generation Failed** - Content placeholder"

def create_frontmatter(metadata: Dict, source_note: str, ai_generated: bool) -> str:
    """Create YAML frontmatter with validation"""
    frontmatter = {
        'created': datetime.now().isoformat(),
        'modified': datetime.now().isoformat(),
        'source': f"[[{source_note}]]",
        'tags': metadata.get('tags', []),
        'summary': metadata.get('summary', ''),
        'concepts': metadata.get('concepts', []),
        'ai_generated': ai_generated
    }
    return yaml.safe_dump(frontmatter, sort_keys=False, allow_unicode=True)

def get_unique_path(output_dir: Path, base_name: str) -> Path:
    """Get unique path by creating numbered subfolders"""
    sanitized_name = re.sub(r'[^\w\-_]', '', base_name.replace(' ', '-'))
    base_path = output_dir / sanitized_name
    counter = 1
    
    while True:
        # Check for existing standalone file
        if (output_dir / f"{sanitized_name}.md").exists():
            (output_dir / f"{sanitized_name}.md").rename(
                base_path.with_name(f"{sanitized_name}-1.md")
            )
            base_path.mkdir(exist_ok=True)
        
        # Find next available numbered file in folder
        candidate = base_path / f"{sanitized_name}-{counter}.md"
        if not candidate.exists():
            return candidate
        counter += 1

def process_note_with_metadata(note_path: Path, output_dir: Path, model: str):
    """Process notes with all requested features"""
    with open(note_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    sections = re.split(r'\n## ', content)
    base_name = note_path.stem
    
    for section in sections:
        if not section.strip():
            continue
            
        lines = section.split('\n')
        original_title = lines[0].strip('#').strip()
        body = '\n'.join(lines[1:])
        full_content = f"{original_title}\n\n{body}"
        
        # Generate metadata
        metadata = generate_metadata(full_content, model)
        concepts = metadata.get('concepts', [])
        
        # Generate content if empty
        ai_generated = False
        if not body.strip():
            body = generate_ai_content(original_title, concepts, model)
            ai_generated = True
        
        # Determine note name
        note_base_name = concepts[0].replace(' ', '') if concepts else \
                       f"{base_name}-{original_title.replace(' ', '-')}"
        
        # Get unique path
        output_path = get_unique_path(output_dir, note_base_name)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Create frontmatter
        frontmatter = create_frontmatter(metadata, base_name, ai_generated)
        
        # Build note content
        note_content = f"---\n{frontmatter}\n---\n\n"
        note_content += f"# {output_path.stem}\n\n"
        note_content += f"## Original Context\nFrom [[{base_name}]]: {original_title}\n\n"
        note_content += "## Content\n" + body + "\n\n"
        note_content += "## Related Concepts\n" + '\n'.join(f"[[{c}]]" for c in concepts[1:])
        
        output_path.write_text(note_content, encoding='utf-8')

def create_global_indices(output_dir: Path):
    """Create global metadata indices with folder support"""
    concept_index = defaultdict(list)
    
    for note in output_dir.glob("**/*.md"):
        if note.name.startswith('_') or note.parent == output_dir:
            continue
            
        with open(note, 'r', encoding='utf-8') as f:
            frontmatter = next(yaml.safe_load_all(f))
            concepts = frontmatter.get('concepts', [])
            
            if concepts:
                primary_concept = concepts[0]
                relative_path = note.relative_to(output_dir)
                concept_index[primary_concept].append(str(relative_path))
    
    # Create concept index
    index_content = "# Concept Index\n\n"
    for concept, notes in concept_index.items():
        index_content += f"## {concept}\n" + '\n'.join(
            f"- [[{Path(note).parent}/{Path(note).stem}]]" for note in notes
        ) + "\n\n"
    
    (output_dir / "_CONCEPTS.md").write_text(index_content, encoding='utf-8')

if __name__ == "__main__":
    VAULT_ROOT = Path("/home/vikk/Documents/GitHub/College-Notes")
    INPUT_DIR = VAULT_ROOT / "Notes"
    OUTPUT_DIR = VAULT_ROOT / "Concept_Notes"
    MODEL = "mistral-small:22b"
    
    # Process notes
    for note_path in INPUT_DIR.glob("**/*.md"):
        if "MOC" not in note_path.name:
            process_note_with_metadata(note_path, OUTPUT_DIR, MODEL)
    
    # Create global indices
    create_global_indices(OUTPUT_DIR)
    
    print(f"Organized concept notes created at: {OUTPUT_DIR}")

/tmp/ipykernel_4008/3035785273.py:30: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  return response.json().get("response", {})


OSError: [Errno 36] File name too long: '/home/vikk/Documents/GitHub/College-Notes/Concept_Notes/Physics Assignment 1-i)Describe-and-explain-the-formation-of-Newton’s-rings-in-reflected-light-and-obtain-the-expressions-for-the-diameter-of-the-nth-bright-and-dark-ring.-From-your-results,-find-expressions-for-radius-of-curvature-of-the-Plano-convex-lens,-wavelength-of-monochromatic-light-and-refractive-index-of-the-medium..md'