### Ideal Procedure

1. Get menu items
2. Iterate over items
3. Retrieve content

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

import requests
from bs4 import BeautifulSoup
import pprint
import json
import csv
from io import StringIO
import re

LIVINGDOCS_API_KEY = os.environ.get("LIVINGDOCS_API_KEY", None)

### Sitemap

In [11]:
res = requests.get("https://www.ch.ch/sitemap-de.xml")
soup = BeautifulSoup(res.content, features="xml")

In [None]:
urls = [x.text for x in soup.find_all("loc")]
urls

In [None]:
tags = []
for url in urls:
    tag = url.replace("https://www.ch.ch/de/", "").split("/")[0]
    if tag:
        tags.append(tag)
    else:
        tags.append(None)
tags

# Selenium scraping

In [225]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
import re
from typing import Dict
import pandas as pd

In [None]:
# Set up Firefox options (optional: run headless)
options = Options()
options.headless = False

# Set up the WebDriver
#service = Service("/path/to/geckodriver")
service = None
driver = webdriver.Firefox(service=service, options=options)

In [None]:
i = 90
driver.get(urls[i])
urls[i]

In [217]:
soup = BeautifulSoup(driver.page_source)

# FAQ

In [239]:
def extract_text_with_links(soup):
    content = []
    for element in soup.find_all(["p", "span", "a"]):  # Extract paragraphs and links
        if element.name == "a":
            content.append(f'<a href="{element.get("href")}">{element.text}</a>')
        else:
            content.append(element.text)
    return " ".join(content)

def get_faq(soup: BeautifulSoup, url: str, language: str) -> Dict:

    # get questions
    elements = soup.find_all("span", attrs={"data-v-166b6cfb": True}, class_="mr-6 text-left h2" )

    questions = []
    for element in elements:
        element = element.text.strip()
        if element.endswith("?"):
            questions.append(element)
        else:
            questions.append(None)

    questions = [q for q in questions if q]

    # get answers:
    answers = []
    elements = soup.find_all("div", {"itemprop": "acceptedAnswer"})
    for element in elements:
        if element:
            answers.append(element)
        else:
            answers.append(None)

    faq_items = []
    if len(questions) == len(answers):
        for q, a in zip(questions, answers):
            faq_items.append(
                {
                    "text": q,
                    "answer": extract_text_with_links(a),
                    "url": url,
                    "language": language,
                }
            )

    return faq_items

In [None]:
elements = soup.find_all("span", attrs={"data-v-166b6cfb": True}, class_="mr-6 text-left h2" )

questions = []
for element in elements:
    element = element.text.strip()
    if element.endswith("?"):
        questions.append(element)
    else:
        questions.append(None)

questions = [q for q in questions if q]
print(len(questions))
print(questions)

In [None]:
answers = []
elements = soup.find_all("div", {"itemprop": "acceptedAnswer"})
for element in elements:
    if element:
        answers.append(element)
    else:
        answers.append(None)

answers = [a for a in answers if a]

print(len(answers))
#print(answers)

# RAG

In [244]:
def get_rag_doc(text: str, url: str, language: str, tag: str) -> Dict:

    start_marker = "Einfache Antworten zum Leben in der Schweiz"
    end_marker = "Eine Dienstleistung des Bundes, der Kantone und Gemeinden"

    # Regex pattern to capture everything between the markers (non-greedy `.*?`)
    pattern = rf"{re.escape(start_marker)}(.*?){re.escape(end_marker)}"

    # Search for content between markers
    match = re.search(pattern, text, re.DOTALL)

    # Extract and print result
    if match:
        extracted_content = match.group(1).strip()
        return {
            "text": extracted_content,
            "url": url,
            "language": language,
            "tags": [tag],
            "subtopics": None,
            "summary": None,
            "hyq": None,
            "hyq_declarative": None,
            "doctype": "context_doc",
            "organizations": ["BK"],
        }

    return None

In [None]:
# Set up Firefox options (optional: run headless)
options = Options()
options.headless = False

# Set up the WebDriver
#service = Service("/path/to/geckodriver")
service = None
driver = webdriver.Firefox(service=service, options=options)

In [None]:
language = "de"

faq_items = []
rag_docs = []
for url, tag in zip(urls, tags):

    driver.get(url)
    soup = BeautifulSoup(driver.page_source)

    # faq
    faq_items.extend(get_faq(soup, url, language))

    # rag
    text = extract_text_with_links(soup)
    rag_docs.append(get_rag_doc(text, url, language, tag))

len(rag_docs)

In [270]:
rag_docs = [doc for doc in rag_docs if doc]

In [None]:
pd.DataFrame(rag_docs)

In [238]:
pd.DataFrame(faq_items).to_csv("indexing/data/ch_ch_copilot/autocomplete/autocomplete.csv", index=None)

In [273]:
pd.DataFrame(rag_docs).to_csv("indexing/data/ch_ch_copilot/ch_ch/ch_ch.csv", index=None)

In [274]:
# Close the browser
driver.quit()

# EMBED DOCS

In [None]:
from utils.embedding import get_embedding

In [None]:
embed_docs = []
for doc in rag_docs:
    text_embedding = await get_embedding(doc["text"])
    doc["text_embedding"] = text_embedding
    embed_docs.append(doc)

In [None]:
pd.DataFrame(embed_docs)

In [284]:
pd.DataFrame(embed_docs).to_csv("indexing/data/ch_ch_copilot/ch_ch/ch_ch.csv", index=None)

In [335]:
df = pd.read_csv("indexing/data/ch_ch_copilot/vaud/prestations_vd.csv")

In [336]:
tags = df.tags.apply(lambda x: x.split(","))
unique_tags = set(sum(tags.tolist(), []))
unique_tags = sorted([x.strip() for x in list(unique_tags)])

In [337]:
mapping = {
    "documents d'identité": "documents-identité",
    "emploi - chômage": "emploi-chômage",
    "l'offre de mobilité à votre disposition": "offre-de-mobilité-à-votre-disposition"
}

def reformat_tag(tag: str) -> str:
    for old, new in mapping.items():
        if tag == old:
            tag = new
    tag = tag.strip().replace(" ", "-")
    return tag

In [None]:
new_tags = df.tags.apply(lambda row: [reformat_tag(tag) for tag in row.split(",")])
df["tags"] = new_tags

In [352]:
df["tags"] = df.tags.apply(lambda x: ",".join(x))

In [353]:
df["organizations"] = "BK"

In [354]:
df.to_csv("indexing/data/ch_ch_copilot/vaud/prestations_vd.csv", index=None)

### translations

In [None]:
{k.replace(" ", "-"):k.capitalize() for k in unique_tags}

In [379]:
import ast
df = pd.read_csv("indexing/data/ch_ch_copilot/ch_ch/ch_ch.csv")

In [380]:
df["organizations"] = "BK"
df["tags"] = df.tags.apply(lambda x: ast.literal_eval(x)[0])

In [None]:
df.tags

In [384]:
df.to_csv("indexing/data/ch_ch_copilot/ch_ch/ch_ch.csv", index=None)

### ---> API APPROACH (LEGACY)

### Menus

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/menus",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
menus = response.json()
pprint.pprint(menus)

In [None]:
def build_hierarchy(nodes):
    """
    Recursively builds a hierarchical dictionary from a list of nodes.
    Each node can have its own nested nodes or a documentId.

    Args:
        nodes (list): List of nodes to process.

    Returns:
        dict: A nested dictionary representing the hierarchy.
    """
    hierarchy = {}
    for node in nodes:
        label = node.get("label", "Unknown")  # Use a default if 'label' is missing
        if node.get("nodes"):  # Check for nested nodes
            hierarchy[label] = build_hierarchy(node["nodes"])
        else:
            # Gracefully handle missing 'documentId' with a default or None
            hierarchy[label] = node.get("documentId", None)
    return hierarchy


def parse_menus(menus):
    """
    Parses the given menus list and filters by handle,
    constructing a hierarchical dictionary for specific handles.

    Args:
        menus (list): List of menu dictionaries to process.

    Returns:
        dict: A hierarchical dictionary with parsed data.
    """
    categories = {}
    for menu in menus:
        label = menu.get("label")
        if label in ["chch-de", "chch-fr", "chch-it", "wahlen-de", "wahlen-fr", "wahlen-it"]:
            categories[label] = build_hierarchy(menu.get("nodes", []))
    return categories

# Example usage:
categories = parse_menus(menus)


In [None]:
categories["chch-fr"]

In [None]:
def invert_dict(d, path=None):
    """
    Inverts a nested dictionary so that the innermost values become keys,
    and the keys in the original dictionary are reversed in the nested structure.

    Args:
        d (dict): The original dictionary to invert.
        path (list): Tracks the path of keys leading to a value.

    Returns:
        dict: The inverted dictionary.
    """
    inverted = {}
    path = path or []

    for key, value in d.items():
        if isinstance(value, dict):
            # Recursively process nested dictionaries
            inverted.update(invert_dict(value, path + [key]))
        else:
            # Use the value as the key and reverse the path
            if value is not None:  # Skip None values
                inverted[value] = path + [key]

    return inverted

inverted = invert_dict(categories)
#pprint.pprint(inverted)


### Get doc by ID

In [None]:
def get_document_by_id(document_id: int) -> dict:


    # Define filters in JSON format
    filters = json.dumps({
        "key": "documentId",
        "term": document_id
    })

    # Make the GET request
    response = requests.get(
        f"https://cms.ch.ch/api/v1/publications/search?filters={filters}",
        headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
    )

    # Parse and print the response
    if response.status_code == 200:
        return response.json()[0]
    else:
        print(f"Error: {response.status_code}, {response.text}")

In [None]:
document_id = 5309
doc = get_document_by_id(document_id)

In [None]:
doc["content"]

### Extract left components

In [None]:
components = doc["content"][0]["containers"]["left"]

In [None]:
class LeftDataExtractor:
    def __init__(self, data):
        """
        Initialize the extractor.
        :param data: Input data to process.
        """
        self.data = data
        self.result_list = []

    def extract(self):
        """
        Extract and process all data.
        """
        for item in self.data:
            component = item.get('component')
            if component == 'title':
                self._process_title(item)
            elif component == 'lead':
                self._process_lead(item)
            elif component == 'infobox':
                self._process_infobox(item)
        return self.result_list

    def _process_title(self, item):
        """
        Process title components.
        """
        title = item['content'].get('title', '')
        self.result_list.append({'type': 'title', 'title': title})

    def _process_lead(self, item):
        """
        Process lead components.
        """
        text = item['content'].get('text', '')
        self.result_list.append({'type': 'lead', 'text': text})

    def _process_infobox(self, item):
        """
        Process infobox components.
        """
        infobox_content = []
        # Extract the main text content of the infobox
        body = item.get('containers', {}).get('infobox', [])
        for element in body:
            if element.get('component') == 'p':
                text = element['content'].get('text', '')
                infobox_content.append(text)

        # Extract the category ID (if exists)
        category_id = (
            item.get('content', {})
            .get('category', {})
            .get('params', {})
            .get('category', {})
            .get('reference', {})
            .get('id')
        )
        self.result_list.append({
            'type': 'infobox',
            'category_id': category_id,
            'content': infobox_content,
        })

    def format_data(self):
        """
        Format extracted data into a list of unified strings by element.
        """
        formatted_elements = []
        for item in self.result_list:
            formatted_content = []

            if item['type'] == 'title':
                formatted_content.append(f"# {item['title']}")
            elif item['type'] == 'lead':
                formatted_content.append(f"{item['text']}")
            elif item['type'] == 'infobox':
                if item['category_id']:
                    formatted_content.append(f"Category ID: {item['category_id']}")
                if item['content']:
                    formatted_content.append("\n".join(item['content']))

            # Join all content for the element and add to the list
            formatted_elements.append("\n".join(formatted_content))

        return formatted_elements


In [None]:
extractor = LeftDataExtractor(components)
data = extractor.extract()
formatted_data = extractor.format_data()

In [None]:
formatted_data

### Extract right components

In [None]:
components = doc["content"][0]["containers"]["right"]

In [None]:
import re
import json
import requests
from collections import defaultdict

class RightDataExtractor:
    def __init__(self, data, document_id, api_key=None, table_format='json'):
        """
        Initialize the extractor.
        :param data: Input data to process.
        :param documentId: The ID of the current document.
        :param api_key: API key for accessing documents.
        :param table_format: Format for tables ('json' or 'csv').
        """
        self.data = data
        self.documentId = str(document_id)
        self.api_key = api_key if api_key else LIVINGDOCS_API_KEY
        self.table_format = table_format.lower()
        self.result_list = []
        self.nodes = set()
        self.edges = []
        self.processed_documents = set()

    def get_document_by_id(self, document_id):
        """
        Fetch the document with the given ID.
        """
        filters = json.dumps({
            "key": "documentId",
            "term": document_id
        })
        try:
            response = requests.get(
                f"https://cms.ch.ch/api/v1/publications/search?filters={filters}",
                headers={"Authorization": f"Bearer {self.api_key}"}
            )
            if response.status_code == 200:
                results = response.json()
                if results:
                    return results[0]
            else:
                print(f"Error fetching document {document_id}: {response.status_code}, {response.text}")
        except Exception as e:
            print(f"Exception fetching document {document_id}: {e}")
        return None

    def extract(self, data=None):
        """
        Extract and process all data.
        """
        if data is None:
            data = self.data
            self.processed_documents.add(self.documentId)
        self._process_components(data)
        return self.result_list

    def _process_components(self, components):
        """
        Process a list of components.
        """
        for item in components:
            component = item.get('component')
            if component == 'faq-teaser':
                self._process_faq_teaser(item)
            elif component == 'accordion':
                self._process_accordion(item)
            elif component == 'faq-container':
                self._process_faq_container(item)
            # Add other component types if needed

    def inject_url(self, text, inverted, language):
        """
        Inject URLs in the text based on specific patterns.
        :param text: Input text to process.
        :param inverted: Dictionary for resource resolution.
        :param language: Language for message formatting.
        :return: Processed text with URLs replaced.
        """
        def get_resource(documentId, language):
            resource = " -> ".join(inverted[documentId][1:])
            return resource

        # Regex to match <a> tags with the specific href pattern and text between the tags
        pattern = r'<a[^>]*href="https://www\.ch\.ch/(\d+)"[^>]*>([^<]+)</a>'

        def replace_match(match):
            document_id = match.group(1)
            link_text = match.group(2).strip()
            if document_id in inverted and link_text:  # Ensure document ID exists and text is non-empty
                return get_resource(document_id, language)
            return match.group(0)  # Leave the original tag if conditions are not met

        # Replace matches in the input text
        return re.sub(pattern, replace_match, text)

    def _process_faq_teaser(self, item):
        """
        Process faq-teaser components.
        """
        try:
            document_id = item['content']['faq']['params']['teaser']['reference']['id']
            document_id = str(document_id)
            # Add edge to knowledge graph
            self._add_edge(self.documentId, document_id)
            if document_id in self.processed_documents:
                return  # Avoid processing the same document multiple times
            # Fetch the referenced document
            document_data = self.get_document_by_id(document_id)
            if document_data:
                self.processed_documents.add(document_id)
                # Process the content of the fetched document
                self._process_components(document_data.get('content', []))
            else:
                # If unable to fetch, store the reference
                self.result_list.append({'type': 'faq-teaser', 'documentId': document_id})
        except Exception as e:
            print(f"Error processing faq-teaser: {e}")
            pass  # Handle errors as needed

    def _process_faq_container(self, item):
        """
        Process faq-container components.
        """
        question = item['content'].get('question', '')
        content_list = self._process_body(item.get('containers', {}).get('body', []))
        self.result_list.append({'type': 'faq-container', 'question': question, 'content': content_list})

    def _process_accordion(self, item):
        """
        Process accordion components.
        """
        title = item['content'].get('title', '')
        content_list = self._process_body(item.get('containers', {}).get('body', []))
        self.result_list.append({'type': 'accordion', 'title': title, 'content': content_list})

    def _process_body(self, body):
        """
        Process the body of components like accordion or faq-container.
        """
        content_list = []

        for element in body:
            elem_component = element.get('component')
            if elem_component == 'subtitle':
                subtitle_title = element['content'].get('title', '')
                content_list.append({'type': 'subtitle', 'title': subtitle_title})
            elif elem_component == 'p':
                text = element['content'].get('text', '')
                content_list.append({'type': 'p', 'text': text})
                self._extract_urls_from_text(text)
            elif elem_component == 'list':
                list_content = self._process_list(element)
                content_list.append(list_content)
                for item in list_content['items']:
                    self._extract_urls_from_text(item)
            elif elem_component == 'table':
                table_content = self._process_table(element)
                content_list.append(table_content)
                # Extract URLs from table content
                self._extract_urls_from_table(table_content)
            elif elem_component == 'faq-teaser':
                self._process_faq_teaser(element)
            # Handle other components as needed

        return content_list

    def _process_list(self, element):
        """
        Process list components.
        """
        list_items = element.get('containers', {}).get('list', [])
        items = [item.get('content', {}).get('text', '') for item in list_items]
        return {'type': 'list', 'items': items}

    def _process_table(self, element):
        """
        Process table components and return in the specified format.
        """
        containers = element.get('containers', {})
        headers = []

        # Extract headers
        header_rows = containers.get('header', [])
        for header_row in header_rows:
            cells = header_row.get('containers', {}).get('header-row', [])
            for cell in cells:
                header_text = self._extract_text_from_containers(cell, 'header-cell')
                headers.append(header_text)

        # Extract rows
        rows = []
        body_rows = containers.get('body', [])
        for body_row in body_rows:
            row_data = {}
            cells = body_row.get('containers', {}).get('body-row', [])
            for idx, cell in enumerate(cells):
                cell_text = self._extract_text_from_containers(cell, 'body-cell')
                if idx < len(headers):
                    row_data[headers[idx]] = cell_text
                else:
                    row_data[f"Column_{idx + 1}"] = cell_text
            rows.append(row_data)

        if self.table_format == 'csv':
            return {'type': 'table', 'data': self._table_to_csv(headers, rows)}
        return {'type': 'table', 'data': rows}

    def _table_to_csv(self, headers, rows):
        """
        Convert table data to CSV format.
        """
        from io import StringIO
        import csv

        output = StringIO()
        writer = csv.DictWriter(output, fieldnames=headers)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)
        return output.getvalue().strip()

    def _extract_text_from_containers(self, container, cell_type):
        """
        Helper method to extract text from nested containers.
        """
        cell_text = ''
        cell_contents = container.get('containers', {}).get(cell_type, [])
        for content in cell_contents:
            if content.get('component') == 'p':
                text = content.get('content', {}).get('text', '')
                cell_text += text
                self._extract_urls_from_text(text)
        return cell_text

    def _extract_urls_from_text(self, text):
        """
        Extract URLs from the given text and update nodes and edges.
        """
        # Regex to find all href attributes in <a> tags
        hrefs = re.findall(r'<a[^>]*href="([^"]+)"[^>]*>(.*?)</a>', text)

        internal_pattern = re.compile(r'https://www\.ch\.ch/(\d+)')

        for href, link_text in hrefs:
            if href.strip():
                internal_match = internal_pattern.match(href)
                if internal_match:
                    # Internal URL
                    target_document_id = internal_match.group(1)
                    self._add_edge(self.documentId, target_document_id)
                else:
                    # External URL
                    self._add_edge(self.documentId, href)

    def _extract_urls_from_table(self, table_content):
        """
        Extract URLs from table data.
        """
        if self.table_format == 'csv':
            # Parse CSV data
            import csv
            from io import StringIO

            csv_data = StringIO(table_content['data'])
            reader = csv.DictReader(csv_data)
            for row in reader:
                for cell in row.values():
                    self._extract_urls_from_text(cell)
        else:
            # JSON format
            for row in table_content['data']:
                for cell in row.values():
                    self._extract_urls_from_text(cell)

    def _add_edge(self, source, target):
        """
        Add an edge to the knowledge graph.
        """
        self.nodes.add(str(source))
        self.nodes.add(str(target))
        self.edges.append((str(source), str(target)))

    def build_knowledge_graph(self):
        """
        Build and return the knowledge graph representation.
        """
        graph = {
            'nodes': list(self.nodes),
            'edges': [{'source': s, 'target': t} for s, t in self.edges]
        }
        return graph

    def format_data(self):
        """
        Format extracted data into a list of unified strings by element.
        """
        formatted_elements = []
        for item in self.result_list:
            formatted_content = []

            # Format Accordion
            if item['type'] == 'accordion':
                formatted_content.append(f"# {item['title']}")
                formatted_content.extend(self._format_content_list(item.get('content', [])))

            # Format FAQ Container
            elif item['type'] == 'faq-container':
                formatted_content.append(f"Question: {item['question']}")
                formatted_content.extend(self._format_content_list(item.get('content', [])))

            # Handle other components as needed
            # Note: faq-teaser content is processed and added via recursive calls

            # Join all content for the element and add to the list
            formatted_elements.append("\n".join(formatted_content))

        return formatted_elements

    def _format_content_list(self, content_list):
        """
        Helper method to format content list for output.
        """
        formatted_content = []
        for content in content_list:
            if content['type'] == 'subtitle':
                formatted_content.append(f"## {content['title']}")
            elif content['type'] == 'p':
                formatted_content.append(content['text'])
            elif content['type'] == 'list':
                formatted_content.append("\n".join([f"- {i}" for i in content['items']]))
            elif content['type'] == 'table':
                table_data = content['data']
                if self.table_format == 'csv':
                    formatted_content.append(f"\n{table_data}")
                else:  # JSON
                    formatted_content.append(json.dumps(table_data, indent=2))
        return formatted_content


In [None]:
extractor = RightDataExtractor(components, document_id=document_id)
data = extractor.extract()
knowledge_graph = extractor.build_knowledge_graph()

# Print the knowledge graph in JSON format
print(json.dumps(knowledge_graph, indent=2))

In [None]:
extractor = RightDataExtractor(components, document_id, table_format='csv')
data = extractor.extract()
formatted_data = extractor.format_data()

In [None]:
print(formatted_data[0])

### Run on all document_id

In [None]:
for doc_id in inverted.keys():
    print(f"DOC ID: {doc_id}")
    extractor = RightDataExtractor(components, doc_id, table_format='csv')
    data = extractor.extract()
    formatted_data = extractor.format_data()

    for item in formatted_data:
        print(item)
        print("------")
    print("---------------------------")


### Project

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/project",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
pprint.pprint(response.json())

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/document-lists",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
print(response.json())

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/document-lists/54172",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
pprint.pprint(response.json())

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/categories/faq-teaser",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
res = response.json()
print(res)

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/publications/search?component=faq-teaser?limit=1",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
res = response.json()
pprint.pprint(res)

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/publications/search?limit=100",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
res = response.json()
pprint.pprint(res)

In [None]:
res[1].keys()

In [None]:
res[4]["systemdata"]

In [None]:
res[4]["content"][0].keys()

In [None]:
res[4]["content"][0]["id"]

In [None]:
res[4]["content"][0]["containers"].keys()

In [None]:
res[4]["content"][0]["containers"]["left"]

In [None]:
res[4]["content"][0]["containers"]["right"]

In [None]:

API_BASE_URL = "https://cms.ch.ch/api/v1"

# Example data structure with items
items = [
    {
        'component': 'faq-teaser',
        'identifier': 'p:34:34.faq-teaser',
        'id': 'doc-1iaq8v15f0',
        'content': {
            'faq': {
                'service': 'faq-teaser',
                'params': {
                    'teaser': {
                        '$ref': 'document',
                        'reference': {'id': '10328'}
                    }
                }
            }
        }
    },
    {
        'component': 'accordion',
        'identifier': 'p:34:34.accordion',
        'id': 'doc-1iaq8liqi0',
        'content': {'title': 'Voyager avec des enfants'}
    }
]

def fetch_document_by_id(doc_id):
    # Make a GET request to fetch the document content
    url = f"{API_BASE_URL}/publications/{doc_id}"
    headers = {"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching document {doc_id}: {response.status_code}")
        return None

# Extract and process faq-teaser items
for item in items:
    if item['component'] == 'faq-teaser':
        ref_id = item['content']['faq']['params']['teaser']['reference']['id']
        print(f"Fetching content for reference ID: {ref_id}")
        document = fetch_document_by_id(ref_id)
        if document:
            print(f"Content for {ref_id}: {document}")


In [None]:
url = f"{API_BASE_URL}/publications/{5810}"
headers = {"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
response = requests.get(url, headers=headers)

response

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/sitemaps/index",
    params={"baseUrl": "https://www.ch.ch"},
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
res = response.json()
print(res)

In [None]:
response = requests.get(
    "https://cms.ch.ch/api/v1/documents/latestPublications",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
res = response.json()

In [None]:
res[0]

In [None]:
response = requests.get(
    "https://livingdocs-ch-ch-prod/api/v1/project",
    headers={"Authorization": f"Bearer {LIVINGDOCS_API_KEY}"}
)
res = response.json()