In [None]:
#!/usr/bin/env python3
"""
OParl API Structure Analyzer for Augsburg
Analyzes the OParl API and generates comprehensive documentation
"""

import requests
import json
import time
from datetime import datetime
from typing import Dict, List, Any, Optional
from collections import defaultdict
import sys

class OParlAnalyzer:
    def __init__(self, system_url: str):
        self.system_url = system_url
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'OParl-Analyzer/1.0',
            'Accept': 'application/json'
        })
        self.stats = defaultdict(int)
        self.examples = {}
        self.structure = {}

    def fetch_json(self, url: str, timeout: int = 10) -> Optional[Dict]:
        """Fetch JSON from URL with error handling"""
        try:
            response = self.session.get(url, timeout=timeout)
            response.raise_for_status()
            self.stats['api_calls'] += 1
            time.sleep(0.5)  # Rate limiting
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {url}: {e}", file=sys.stderr)
            return None

    def fetch_paginated_list(self, url: str, max_items: int = 10) -> List[Dict]:
        """Fetch items from a paginated list endpoint"""
        items = []
        page_count = 0

        while url and len(items) < max_items:
            data = self.fetch_json(url)
            if not data:
                break

            page_count += 1
            if 'data' in data:
                items.extend(data['data'][:max_items - len(items)])
                url = data.get('links', {}).get('next')
            else:
                # Single item, not a list
                items.append(data)
                break

        self.stats[f'pages_fetched'] += page_count
        return items

    def analyze_object_structure(self, obj: Dict, object_type: str) -> Dict[str, Any]:
        """Analyze the structure of an OParl object"""
        structure = {
            'type': obj.get('type', 'unknown'),
            'properties': {},
            'relationships': [],
            'array_properties': []
        }

        for key, value in obj.items():
            if key in ['id', 'type']:
                continue

            if isinstance(value, str) and value.startswith('http'):
                structure['relationships'].append(key)
            elif isinstance(value, list):
                structure['array_properties'].append({
                    'name': key,
                    'length': len(value),
                    'sample_type': type(value[0]).__name__ if value else 'empty'
                })
            else:
                structure['properties'][key] = type(value).__name__

        return structure

    def analyze_system(self) -> Dict:
        """Analyze the System endpoint"""
        print("Analyzing System endpoint...")
        system_data = self.fetch_json(self.system_url)

        if not system_data:
            return {}

        self.structure['system'] = {
            'name': system_data.get('name'),
            'vendor': system_data.get('vendor'),
            'product': system_data.get('product'),
            'oparlVersion': system_data.get('oparlVersion'),
            'license': system_data.get('license'),
            'contactEmail': system_data.get('contactEmail'),
            'website': system_data.get('website'),
            'bodies_url': system_data.get('body')
        }

        self.examples['system'] = system_data
        return system_data

    def analyze_bodies(self, bodies_url: str) -> List[Dict]:
        """Analyze Body endpoints"""
        print("Analyzing Bodies...")
        bodies = self.fetch_paginated_list(bodies_url, max_items=5)

        if not bodies:
            return []

        self.structure['bodies'] = {
            'count': len(bodies),
            'structure': self.analyze_object_structure(bodies[0], 'Body') if bodies else {},
            'examples': []
        }

        for body in bodies[:2]:  # Analyze first 2 bodies in detail
            body_info = {
                'id': body.get('id'),
                'name': body.get('name'),
                'short_name': body.get('shortName'),
                'ags': body.get('ags'),
                'organization_url': body.get('organization'),
                'person_url': body.get('person'),
                'meeting_url': body.get('meeting'),
                'paper_url': body.get('paper'),
                'legislative_term_url': body.get('legislativeTerm')
            }
            self.structure['bodies']['examples'].append(body_info)

        self.examples['body'] = bodies[0] if bodies else {}
        return bodies

    def analyze_organizations(self, org_url: str) -> None:
        """Analyze Organization endpoints"""
        print("Analyzing Organizations...")
        orgs = self.fetch_paginated_list(org_url, max_items=10)

        self.structure['organizations'] = {
            'count': len(orgs),
            'structure': self.analyze_object_structure(orgs[0], 'Organization') if orgs else {},
            'sample_names': [org.get('name') for org in orgs[:5]]
        }

        self.examples['organization'] = orgs[0] if orgs else {}
        self.stats['organizations_total'] = len(orgs)

    def analyze_persons(self, person_url: str) -> None:
        """Analyze Person endpoints"""
        print("Analyzing Persons...")
        persons = self.fetch_paginated_list(person_url, max_items=10)

        self.structure['persons'] = {
            'count': len(persons),
            'structure': self.analyze_object_structure(persons[0], 'Person') if persons else {},
            'sample_names': [person.get('name') for person in persons[:5]]
        }

        self.examples['person'] = persons[0] if persons else {}
        self.stats['persons_total'] = len(persons)

    def analyze_meetings(self, meeting_url: str) -> None:
        """Analyze Meeting endpoints"""
        print("Analyzing Meetings...")
        meetings = self.fetch_paginated_list(meeting_url, max_items=10)

        self.structure['meetings'] = {
            'count': len(meetings),
            'structure': self.analyze_object_structure(meetings[0], 'Meeting') if meetings else {},
            'sample_info': []
        }

        for meeting in meetings[:3]:
            meeting_info = {
                'name': meeting.get('name'),
                'start': meeting.get('start'),
                'location': meeting.get('location'),
                'has_agenda_items': 'agendaItem' in meeting,
                'has_invitation': 'invitation' in meeting,
                'has_results_protocol': 'resultsProtocol' in meeting
            }
            self.structure['meetings']['sample_info'].append(meeting_info)

        self.examples['meeting'] = meetings[0] if meetings else {}
        self.stats['meetings_total'] = len(meetings)

        # Analyze a meeting's agenda items
        if meetings and 'agendaItem' in meetings[0]:
            self.analyze_agenda_items(meetings[0]['agendaItem'])

    def analyze_agenda_items(self, agenda_url: str) -> None:
        """Analyze AgendaItem endpoints"""
        print("Analyzing Agenda Items...")
        items = self.fetch_paginated_list(agenda_url, max_items=10)

        self.structure['agenda_items'] = {
            'count': len(items),
            'structure': self.analyze_object_structure(items[0], 'AgendaItem') if items else {},
            'sample_info': []
        }

        for item in items[:3]:
            item_info = {
                'number': item.get('number'),
                'name': item.get('name'),
                'has_consultation': 'consultation' in item,
                'has_auxiliary_file': 'auxiliaryFile' in item,
                'result': item.get('result')
            }
            self.structure['agenda_items']['sample_info'].append(item_info)

        self.examples['agenda_item'] = items[0] if items else {}

    def analyze_papers(self, paper_url: str) -> None:
        """Analyze Paper endpoints"""
        print("Analyzing Papers...")
        papers = self.fetch_paginated_list(paper_url, max_items=10)

        self.structure['papers'] = {
            'count': len(papers),
            'structure': self.analyze_object_structure(papers[0], 'Paper') if papers else {},
            'sample_info': []
        }

        for paper in papers[:3]:
            paper_info = {
                'name': paper.get('name'),
                'reference': paper.get('reference'),
                'paper_type': paper.get('paperType'),
                'has_main_file': 'mainFile' in paper,
                'has_auxiliary_files': 'auxiliaryFile' in paper
            }
            self.structure['papers']['sample_info'].append(paper_info)

        self.examples['paper'] = papers[0] if papers else {}
        self.stats['papers_total'] = len(papers)

        # Analyze file structure
        if papers and 'mainFile' in papers[0]:
            main_file_url = papers[0]['mainFile']
            if isinstance(main_file_url, str):
                self.analyze_file(main_file_url)

    def analyze_file(self, file_url: str) -> None:
        """Analyze File endpoints"""
        print("Analyzing File structure...")
        file_data = self.fetch_json(file_url)

        if file_data:
            self.structure['files'] = {
                'structure': self.analyze_object_structure(file_data, 'File'),
                'example': {
                    'name': file_data.get('name'),
                    'fileName': file_data.get('fileName'),
                    'mimeType': file_data.get('mimeType'),
                    'size': file_data.get('size'),
                    'has_download_url': 'accessUrl' in file_data
                }
            }
            self.examples['file'] = file_data

    def run_full_analysis(self) -> Dict:
        """Run complete API analysis"""
        print("=" * 60)
        print("Starting OParl API Analysis for Augsburg")
        print("=" * 60)

        # Start with System
        system_data = self.analyze_system()
        if not system_data:
            print("Failed to fetch system data!")
            return {}

        # Analyze Bodies
        bodies = self.analyze_bodies(system_data.get('body'))
        if not bodies:
            print("No bodies found!")
            return self.structure

        # Use first body for detailed analysis
        body = bodies[0]

        # Analyze all major endpoints
        if body.get('organization'):
            self.analyze_organizations(body['organization'])

        if body.get('person'):
            self.analyze_persons(body['person'])

        if body.get('meeting'):
            self.analyze_meetings(body['meeting'])

        if body.get('paper'):
            self.analyze_papers(body['paper'])

        print("\n" + "=" * 60)
        print("Analysis Complete!")
        print(f"Total API Calls: {self.stats['api_calls']}")
        print("=" * 60)

        return self.structure

    def generate_markdown_report(self, output_file: str = 'OPARL_API_STRUCTURE.md'):
        """Generate comprehensive markdown documentation"""

        with open(output_file, 'w', encoding='utf-8') as f:
            f.write("# OParl API Guide - Stadt Augsburg\n\n")
            f.write(f"*Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n")
            f.write("---\n\n")

            # Quick Start Guide
            f.write("## Quick Start: How to Use This API\n\n")
            f.write("This API lets you access all public political documents, meetings, and decisions from Augsburg's city council.\n\n")

            f.write("### Basic Concept\n\n")
            f.write("**If you want to...** → **Then you...**\n\n")
            f.write("- **Get all meetings** → Call `https://www.augsburg.sitzung-online.de/public/oparl/meetings?body=1`\n")
            f.write("- **Download a PDF document** → Find the paper, get its `mainFile.accessUrl`, then download it\n")
            f.write("- **See what was discussed in a meeting** → Get the meeting, then fetch its `agendaItem` list\n")
            f.write("- **Find all documents about a topic** → Search through papers using the papers endpoint\n")
            f.write("- **Track a specific committee** → Get organizations, find your committee, then get its meetings\n\n")

            f.write("### Step-by-Step Examples\n\n")

            f.write("#### Example 1: Getting Today's Meetings\n\n")
            f.write("**What you want:** See what meetings are happening today.\n\n")
            f.write("**How to do it:**\n")
            f.write("1. Call: `GET https://www.augsburg.sitzung-online.de/public/oparl/meetings?body=1`\n")
            f.write("2. You get: A list of meetings with dates, names, and locations\n")
            f.write("3. Filter by date: Look for `start` field matching today's date\n\n")

            f.write("```bash\n")
            f.write("curl 'https://www.augsburg.sitzung-online.de/public/oparl/meetings?body=1'\n")
            f.write("```\n\n")

            f.write("#### Example 2: Downloading a Document\n\n")
            f.write("**What you want:** Download a PDF of a specific proposal or document.\n\n")
            f.write("**How to do it:**\n")
            f.write("1. Call: `GET https://www.augsburg.sitzung-online.de/public/oparl/papers?body=1`\n")
            f.write("2. Find the paper you want by looking at `name` or `reference`\n")
            f.write("3. Get the `mainFile.accessUrl` from that paper\n")
            f.write("4. Download: Open that URL in your browser or use curl\n\n")

            f.write("```python\n")
            f.write("import requests\n\n")
            f.write("# Get papers\n")
            f.write("papers = requests.get('https://www.augsburg.sitzung-online.de/public/oparl/papers?body=1').json()\n")
            f.write("first_paper = papers['data'][0]\n\n")
            f.write("# Download the PDF\n")
            f.write("pdf_url = first_paper['mainFile']['accessUrl']\n")
            f.write("pdf_response = requests.get(pdf_url)\n")
            f.write("with open('document.pdf', 'wb') as f:\n")
            f.write("    f.write(pdf_response.content)\n")
            f.write("```\n\n")

            f.write("#### Example 3: Finding All Documents About a Topic\n\n")
            f.write("**What you want:** Find all papers that mention \"Verkehr\" (traffic).\n\n")
            f.write("**How to do it:**\n")
            f.write("1. Call: `GET https://www.augsburg.sitzung-online.de/public/oparl/papers?body=1`\n")
            f.write("2. Loop through all pages (check `links.next` for pagination)\n")
            f.write("3. For each paper, check if `name` contains your keyword\n")
            f.write("4. Collect matching papers and their PDF links\n\n")

            f.write("---\n\n")

            # System Information
            f.write("## System Information\n\n")
            sys_info = self.structure.get('system', {})
            f.write(f"- **Name:** {sys_info.get('name', 'N/A')}\n")
            f.write(f"- **OParl Version:** {sys_info.get('oparlVersion', 'N/A')}\n")
            f.write(f"- **Vendor:** {sys_info.get('vendor', 'N/A')}\n")
            f.write(f"- **Product:** {sys_info.get('product', 'N/A')}\n")
            f.write(f"- **License:** {sys_info.get('license', 'N/A')}\n")
            f.write(f"- **Contact:** {sys_info.get('contactEmail', 'N/A')}\n")
            f.write(f"- **Website:** {sys_info.get('website', 'N/A')}\n")
            f.write(f"- **API Endpoint:** `{self.system_url}`\n\n")

            # API Capabilities
            f.write("## API Capabilities & Available Data\n\n")
            f.write("This OParl API provides access to the following data types:\n\n")

            capabilities = []
            if 'bodies' in self.structure:
                capabilities.append("✅ **Bodies** - Political bodies and their structure")
            if 'organizations' in self.structure:
                capabilities.append(f"✅ **Organizations** - {self.structure['organizations']['count']} organizations found")
            if 'persons' in self.structure:
                capabilities.append(f"✅ **Persons** - {self.structure['persons']['count']} persons found")
            if 'meetings' in self.structure:
                capabilities.append(f"✅ **Meetings** - {self.structure['meetings']['count']} meetings found")
            if 'agenda_items' in self.structure:
                capabilities.append(f"✅ **Agenda Items** - Meeting agenda items with documents")
            if 'papers' in self.structure:
                capabilities.append(f"✅ **Papers** - {self.structure['papers']['count']} papers/documents found")
            if 'files' in self.structure:
                capabilities.append("✅ **Files** - PDF documents and attachments")

            for cap in capabilities:
                f.write(f"{cap}\n")

            f.write(f"\n**Total API Calls Made:** {self.stats['api_calls']}\n\n")

            # Bodies
            if 'bodies' in self.structure:
                f.write("## Bodies (Gremien)\n\n")
                bodies = self.structure['bodies']
                f.write(f"Found **{bodies['count']}** bodies.\n\n")

                for i, body in enumerate(bodies.get('examples', []), 1):
                    f.write(f"### Body {i}: {body.get('name', 'Unknown')}\n\n")
                    f.write(f"- **ID:** `{body.get('id')}`\n")
                    f.write(f"- **Short Name:** {body.get('short_name', 'N/A')}\n")
                    f.write(f"- **AGS:** {body.get('ags', 'N/A')}\n")
                    f.write("\n**Available Endpoints:**\n")
                    f.write(f"- Organizations: `{body.get('organization_url', 'N/A')}`\n")
                    f.write(f"- Persons: `{body.get('person_url', 'N/A')}`\n")
                    f.write(f"- Meetings: `{body.get('meeting_url', 'N/A')}`\n")
                    f.write(f"- Papers: `{body.get('paper_url', 'N/A')}`\n\n")

            # Organizations
            if 'organizations' in self.structure:
                f.write("## Organizations\n\n")
                orgs = self.structure['organizations']
                f.write(f"Total organizations found: **{orgs['count']}**\n\n")
                f.write("**Sample Organizations:**\n")
                for name in orgs.get('sample_names', []):
                    f.write(f"- {name}\n")
                f.write("\n")

            # Persons
            if 'persons' in self.structure:
                f.write("## Persons\n\n")
                persons = self.structure['persons']
                f.write(f"Total persons found: **{persons['count']}**\n\n")
                f.write("**Sample Persons:**\n")
                for name in persons.get('sample_names', []):
                    f.write(f"- {name}\n")
                f.write("\n")

            # Meetings
            if 'meetings' in self.structure:
                f.write("## Meetings (Sitzungen)\n\n")
                meetings = self.structure['meetings']
                f.write(f"Total meetings found: **{meetings['count']}**\n\n")
                f.write("**Sample Meetings:**\n\n")
                for i, meeting in enumerate(meetings.get('sample_info', []), 1):
                    f.write(f"### Meeting {i}\n")
                    f.write(f"- **Name:** {meeting.get('name', 'N/A')}\n")
                    f.write(f"- **Date:** {meeting.get('start', 'N/A')}\n")
                    f.write(f"- **Location:** {meeting.get('location', 'N/A')}\n")
                    f.write(f"- **Has Agenda Items:** {'Yes' if meeting.get('has_agenda_items') else 'No'}\n")
                    f.write(f"- **Has Invitation:** {'Yes' if meeting.get('has_invitation') else 'No'}\n")
                    f.write(f"- **Has Results Protocol:** {'Yes' if meeting.get('has_results_protocol') else 'No'}\n\n")

            # Agenda Items
            if 'agenda_items' in self.structure:
                f.write("## Agenda Items (Tagesordnungspunkte)\n\n")
                items = self.structure['agenda_items']
                f.write(f"Sample agenda items analyzed: **{items['count']}**\n\n")
                f.write("**Sample Agenda Items:**\n\n")
                for i, item in enumerate(items.get('sample_info', []), 1):
                    f.write(f"### Item {i}\n")
                    f.write(f"- **Number:** {item.get('number', 'N/A')}\n")
                    f.write(f"- **Name:** {item.get('name', 'N/A')}\n")
                    f.write(f"- **Result:** {item.get('result', 'N/A')}\n")
                    f.write(f"- **Has Consultation:** {'Yes' if item.get('has_consultation') else 'No'}\n")
                    f.write(f"- **Has Auxiliary Files:** {'Yes' if item.get('has_auxiliary_file') else 'No'}\n\n")

            # Papers
            if 'papers' in self.structure:
                f.write("## Papers (Vorlagen/Dokumente)\n\n")
                papers = self.structure['papers']
                f.write(f"Total papers found: **{papers['count']}**\n\n")
                f.write("**Sample Papers:**\n\n")
                for i, paper in enumerate(papers.get('sample_info', []), 1):
                    f.write(f"### Paper {i}\n")
                    f.write(f"- **Name:** {paper.get('name', 'N/A')}\n")
                    f.write(f"- **Reference:** {paper.get('reference', 'N/A')}\n")
                    f.write(f"- **Type:** {paper.get('paper_type', 'N/A')}\n")
                    f.write(f"- **Has Main File:** {'Yes' if paper.get('has_main_file') else 'No'}\n")
                    f.write(f"- **Has Auxiliary Files:** {'Yes' if paper.get('has_auxiliary_files') else 'No'}\n\n")

            # Files
            if 'files' in self.structure:
                f.write("## Files (Dateien)\n\n")
                file_info = self.structure['files'].get('example', {})
                f.write("Files contain the actual PDF documents and attachments.\n\n")
                f.write("**Example File Structure:**\n")
                f.write(f"- **Name:** {file_info.get('name', 'N/A')}\n")
                f.write(f"- **File Name:** {file_info.get('fileName', 'N/A')}\n")
                f.write(f"- **MIME Type:** {file_info.get('mimeType', 'N/A')}\n")
                f.write(f"- **Size:** {file_info.get('size', 'N/A')} bytes\n")
                f.write(f"- **Downloadable:** {'Yes' if file_info.get('has_download_url') else 'No'}\n\n")

            # Data Structure Details
            f.write("## Understanding Object Relationships\n\n")
            f.write("### How Objects Connect\n\n")
            f.write("**The most important fields to know:**\n\n")
            f.write("- **`id`**: Unique URL for this object (save this to fetch it again later)\n")
            f.write("- **`name`/`title`**: Human-readable name (what you show to users)\n")
            f.write("- **`type`**: What kind of object this is (Meeting, Paper, etc.)\n")
            f.write("- **`created`/`modified`**: When it was created/updated (for syncing)\n")
            f.write("- **`deleted`**: If true, skip this object (it was removed)\n\n")
            f.write("**For files/PDFs:**\n")
            f.write("- **`accessUrl`**: Direct link to download PDF\n")
            f.write("- **`mimeType`**: File type (usually \"pdf\")\n")
            f.write("- **`size`**: File size in bytes\n")
            f.write("- **`fileName`**: Original filename\n\n")
            f.write("**For dates:**\n")
            f.write("- **`start`/`end`**: Meeting times\n")
            f.write("- **`date`**: Paper/document date\n")
            f.write("- Format: ISO 8601 (YYYY-MM-DDTHH:MM:SS+TZ)\n\n")

            f.write("### Relationships Between Objects\n\n")
            f.write("```\n")
            f.write("System\n")
            f.write(" └── Body (Stadt Augsburg)\n")
            f.write("      ├── Organizations (Committees)\n")
            f.write("      │    └── Meetings\n")
            f.write("      ├── Persons (Politicians)\n")
            f.write("      ├── Meetings\n")
            f.write("      │    └── AgendaItems (Topics)\n")
            f.write("      │         ├── Consultation (How it was handled)\n")
            f.write("      │         └── Files (auxiliary documents)\n")
            f.write("      └── Papers (Proposals/Documents)\n")
            f.write("           ├── MainFile (the main PDF)\n")
            f.write("           ├── AuxiliaryFiles (attachments)\n")
            f.write("           └── Consultation (links to meetings)\n")
            f.write("```\n\n")

            # Complete Workflows
            f.write("## Complete Workflows: From Question to Answer\n\n")

            f.write("### Workflow 1: \"I want to know what the city council decided about traffic last month\"\n\n")
            f.write("**Step-by-step:**\n\n")
            f.write("1. **Get all meetings from last month:**\n")
            f.write("   ```\n")
            f.write("   GET https://www.augsburg.sitzung-online.de/public/oparl/meetings?body=1\n")
            f.write("   ```\n")
            f.write("   You get: List of meetings with dates\n\n")
            f.write("2. **Filter meetings by date:**\n")
            f.write("   Look at the `start` field of each meeting, keep only those from last month\n\n")
            f.write("3. **For each meeting, get the agenda items:**\n")
            f.write("   Check if meeting has `agendaItem` field\n")
            f.write("   ```\n")
            f.write("   GET [meeting's agendaItem URL]\n")
            f.write("   ```\n")
            f.write("   You get: List of topics discussed\n\n")
            f.write("4. **Search agenda items for \"Verkehr\" (traffic):**\n")
            f.write("   Look at each item's `name` field\n\n")
            f.write("5. **Get the documents:**\n")
            f.write("   Each agenda item may have `consultation` which links to `paper`\n")
            f.write("   From paper, get `mainFile.accessUrl` to download PDF\n\n")

            f.write("### Workflow 2: \"I want to download all PDFs from the education committee\"\n\n")
            f.write("**Step-by-step:**\n\n")
            f.write("1. **Find the education committee:**\n")
            f.write("   ```\n")
            f.write("   GET https://www.augsburg.sitzung-online.de/public/oparl/organizations?body=1\n")
            f.write("   ```\n")
            f.write("   You get: List of all committees/organizations\n")
            f.write("   Look for one with \"Bildung\" in the name\n\n")
            f.write("2. **Get meetings of that committee:**\n")
            f.write("   ```\n")
            f.write("   GET [organization's meeting URL]\n")
            f.write("   ```\n")
            f.write("   You get: All meetings of that committee\n\n")
            f.write("3. **For each meeting, get papers:**\n")
            f.write("   If meeting has `invitation` or `resultsProtocol`, those are files\n")
            f.write("   Get agenda items, then their consultations, then papers\n\n")
            f.write("4. **Download all PDFs:**\n")
            f.write("   From each paper's `mainFile.accessUrl`, download the PDF\n\n")

            f.write("### Workflow 3: \"I want to track when specific topics are discussed\"\n\n")
            f.write("**Step-by-step:**\n\n")
            f.write("1. **Get all upcoming meetings:**\n")
            f.write("   ```\n")
            f.write("   GET https://www.augsburg.sitzung-online.de/public/oparl/meetings?body=1\n")
            f.write("   ```\n")
            f.write("   Filter by `start` date in the future\n\n")
            f.write("2. **For each meeting, check if it has agenda items:**\n")
            f.write("   Some meetings may not have agenda items published yet\n\n")
            f.write("3. **Search agenda item names for your keywords:**\n")
            f.write("   Check `name` field of each agenda item\n\n")
            f.write("4. **Get notification details:**\n")
            f.write("   Save: meeting name, date, location, agenda item name, paper reference\n\n")

            f.write("### Workflow 4: \"I want to build a searchable database\"\n\n")
            f.write("**Step-by-step:**\n\n")
            f.write("1. **Download all papers:**\n")
            f.write("   ```\n")
            f.write("   GET https://www.augsburg.sitzung-online.de/public/oparl/papers?body=1\n")
            f.write("   ```\n")
            f.write("   Loop through pages using `links.next`\n\n")
            f.write("2. **For each paper:**\n")
            f.write("   - Save metadata: name, reference, date, paperType\n")
            f.write("   - Download PDF from `mainFile.accessUrl`\n")
            f.write("   - Extract text using a PDF library (PyMuPDF, pdfplumber)\n\n")
            f.write("3. **Store in database:**\n")
            f.write("   - Save to Parquet (for analysis) or PostgreSQL (for search)\n")
            f.write("   - Create full-text search index\n\n")
            f.write("4. **Link relationships:**\n")
            f.write("   - Paper → Consultation → AgendaItem → Meeting → Organization\n")
            f.write("   - Store these links to enable queries like \"all papers discussed by committee X\"\n\n")

            f.write("---\n\n")

            f.write("## Understanding the Data Flow\n\n")
            f.write("```\n")
            f.write("You want to know: \"What did they decide?\"\n")
            f.write("     ↓\n")
            f.write("1. Get MEETING (when did they meet?)\n")
            f.write("     ↓\n")
            f.write("2. Get AGENDA ITEMS (what did they discuss?)\n")
            f.write("     ↓\n")
            f.write("3. Get CONSULTATION (how did they handle it?)\n")
            f.write("     ↓\n")
            f.write("4. Get PAPER (what was the proposal?)\n")
            f.write("     ↓\n")
            f.write("5. Get FILE/PDF (read the details)\n")
            f.write("```\n\n")

            f.write("## API Response Patterns\n\n")
            f.write("### Pattern 1: List Endpoints\n")
            f.write("When you call a list endpoint (meetings, papers, organizations), you get:\n")
            f.write("```json\n")
            f.write("{\n")
            f.write("  \"data\": [ /* array of items */ ],\n")
            f.write("  \"links\": {\n")
            f.write("    \"next\": \"URL to next page\",\n")
            f.write("    \"prev\": \"URL to previous page\"\n")
            f.write("  },\n")
            f.write("  \"pagination\": { /* optional */ }\n")
            f.write("}\n")
            f.write("```\n\n")

            f.write("**What to do:** Loop through pages using `links.next` until it's null/empty\n\n")

            f.write("### Pattern 2: Single Object\n")
            f.write("When you fetch a specific item by URL, you get just the object:\n")
            f.write("```json\n")
            f.write("{\n")
            f.write("  \"id\": \"...\",\n")
            f.write("  \"type\": \"...\",\n")
            f.write("  /* object properties */\n")
            f.write("}\n")
            f.write("```\n\n")

            f.write("### Pattern 3: Relationships\n")
            f.write("Objects reference other objects via URLs:\n")
            f.write("```json\n")
            f.write("{\n")
            f.write("  \"id\": \"...\",\n")
            f.write("  \"meeting\": \"https://...../meetings?id=123\",  // ← fetch this URL\n")
            f.write("  \"organization\": [                             // ← array of URLs\n")
            f.write("    \"https://...../organizations?id=456\"\n")
            f.write("  ]\n")
            f.write("}\n")
            f.write("```\n\n")

            f.write("**What to do:** Make another API call to that URL to get the related object\n\n")

            # Troubleshooting & Tips
            f.write("## Troubleshooting & Common Issues\n\n")

            f.write("### Issue 1: \"I get too much data, how do I filter?\"\n")
            f.write("**Problem:** API returns hundreds of items, you only want recent ones\n\n")
            f.write("**Solution:**\n")
            f.write("- After getting data, filter by date fields: `start`, `date`, `created`, `modified`\n")
            f.write("- Example: Only meetings after 2025-01-01:\n")
            f.write("  ```python\n")
            f.write("  meetings = [m for m in data['data'] if m['start'] >= '2025-01-01']\n")
            f.write("  ```\n\n")

            f.write("### Issue 2: \"The data has weird characters (Ã¼, â€)\"\n")
            f.write("**Problem:** Text encoding issues\n\n")
            f.write("**Solution:**\n")
            f.write("- Make sure you're using UTF-8 encoding when reading/writing\n")
            f.write("- In Python: `response.json()` handles this automatically\n")
            f.write("- When saving to file: `open('file.txt', 'w', encoding='utf-8')`\n\n")

            f.write("### Issue 3: \"Some meetings don't have agenda items\"\n")
            f.write("**Problem:** Not all meetings have published agendas yet\n\n")
            f.write("**Solution:**\n")
            f.write("- Always check if field exists before accessing: `if 'agendaItem' in meeting:`\n")
            f.write("- Future meetings may not have agendas published\n")
            f.write("- Past meetings should have complete data\n\n")

            f.write("### Issue 4: \"I need to make hundreds of API calls\"\n")
            f.write("**Problem:** Building relationships requires many requests\n\n")
            f.write("**Solution:**\n")
            f.write("- Add delays between requests (0.5-1 second): `time.sleep(0.5)`\n")
            f.write("- Cache results: Don't fetch the same URL twice\n")
            f.write("- Use batch processing: Process 10-20 items, then take a break\n\n")

            f.write("### Issue 5: \"Download URLs don't work\"\n")
            f.write("**Problem:** Some PDFs return 403 or 404\n\n")
            f.write("**Solution:**\n")
            f.write("- Use `accessUrl` not `downloadUrl` (downloadUrl is often empty)\n")
            f.write("- Add user agent header: `headers={'User-Agent': 'MyApp/1.0'}`\n")
            f.write("- Some documents may be restricted or deleted (`deleted: true`)\n\n")

            f.write("## Practical Tips\n\n")
            f.write("1. **Start Small:** Test with one meeting or one paper before processing everything\n")
            f.write("2. **Save Progress:** When downloading many PDFs, save which ones you've done\n")
            f.write("3. **Handle Pagination:** Always loop through `links.next` for complete data\n")
            f.write("4. **Check Deleted Flag:** Skip items where `deleted: true`\n")
            f.write("5. **Use Timestamps:** `modified` field helps you sync only changed data\n\n")

            f.write("---\n\n")
            for obj_type, example in self.examples.items():
                f.write(f"### {obj_type.title()} Example\n\n")
                f.write("```json\n")
                f.write(json.dumps(example, indent=2, ensure_ascii=False))
                f.write("\n```\n\n")


            # Footer
            f.write("---\n\n")

            f.write("## Quick Reference Cheat Sheet\n\n")
            f.write("| I want to... | Endpoint | What I get |\n")
            f.write("|-------------|----------|------------|\n")
            f.write("| See all meetings | `/meetings?body=1` | List of meetings with dates, names |\n")
            f.write("| See all documents | `/papers?body=1` | List of papers/proposals with titles |\n")
            f.write("| See all committees | `/organizations?body=1` | List of committees/councils |\n")
            f.write("| Get PDF of a paper | Get paper → `mainFile.accessUrl` | Direct PDF download link |\n")
            f.write("| See meeting agenda | Get meeting → `agendaItem` URL | List of topics discussed |\n")
            f.write("| Find committee meetings | Get org → `meeting` URL | Meetings of that committee |\n")
            f.write("| See meeting protocol | Get meeting → `resultsProtocol` | PDF of meeting minutes |\n")
            f.write("| Track a paper's journey | Paper → `consultation` → `agendaItem` | Where/when it was discussed |\n\n")

            f.write("## Main API Endpoints Summary\n\n")
            f.write("```\n")
            f.write("Base: https://www.augsburg.sitzung-online.de/public/oparl\n\n")
            f.write("/system                    → System info (start here)\n")
            f.write("/bodies                    → Get body/city info\n")
            f.write("/meetings?body=1           → All meetings\n")
            f.write("/papers?body=1             → All papers/documents\n")
            f.write("/organizations?body=1      → All committees/organizations\n")
            f.write("/persons?body=1            → All people (if available)\n")
            f.write("/agendaItems?body=1        → All agenda items\n")
            f.write("/files?body=1              → All files\n")
            f.write("```\n\n")

            f.write("---\n\n")
            f.write("*Generated by OParl API Analyzer*\n")
            f.write(f"*Total API calls made: {self.stats['api_calls']}*\n")
            f.write("*For questions or issues, contact: info@augsburg.de*\n")

        print(f"\n✅ Markdown report saved to: {output_file}")


def main():
    analyzer = OParlAnalyzer("https://www.augsburg.sitzung-online.de/public/oparl/system")

    # Run analysis
    structure = analyzer.run_full_analysis()

    # Generate report
    analyzer.generate_markdown_report()

    # Also save raw JSON for further processing
    with open('oparl_structure.json', 'w', encoding='utf-8') as f:
        json.dump({
            'structure': structure,
            'examples': analyzer.examples,
            'stats': dict(analyzer.stats)
        }, f, indent=2, ensure_ascii=False)

    print("\n✅ Raw structure saved to: oparl_structure.json")


if __name__ == "__main__":
    main()

Starting OParl API Analysis for Augsburg
Analyzing System endpoint...
Analyzing Bodies...
Analyzing Organizations...
Analyzing Persons...


Error fetching https://www.augsburg.sitzung-online.de/public/oparl/persons?body=1: HTTPSConnectionPool(host='www.augsburg.sitzung-online.de', port=443): Read timed out. (read timeout=10)


Analyzing Meetings...
Analyzing Papers...

Analysis Complete!
Total API Calls: 5

✅ Markdown report saved to: OPARL_API_STRUCTURE.md

✅ Raw structure saved to: oparl_structure.json
