In [41]:
from typing import Optional, List, Dict, Generator, Any

import requests
from pydantic import HttpUrl

from api_connection import NotionAPIConnector

connector = NotionAPIConnector()
raw_response:dict = connector.main()
headers = connector.headers

In [40]:
raw_response

{'object': 'list',
 'results': [{'object': 'page',
   'id': 'fcbca7da-3517-4163-9362-21525d5784f1',
   'created_time': '2024-08-22T16:54:00.000Z',
   'last_edited_time': '2024-08-24T13:16:00.000Z',
   'created_by': {'object': 'user',
    'id': '095667f4-633c-4a3c-9ae2-b0da56bd7e27'},
   'last_edited_by': {'object': 'user',
    'id': '095667f4-633c-4a3c-9ae2-b0da56bd7e27'},
   'cover': None,
   'icon': None,
   'parent': {'type': 'database_id',
    'database_id': '83d8eeff-db5a-411b-a460-4e5b68d7080f'},
   'archived': False,
   'in_trash': False,
   'properties': {'Description': {'id': 'j%5BCS',
     'type': 'rich_text',
     'rich_text': [{'type': 'text',
       'text': {'content': 'test', 'link': None},
       'annotations': {'bold': False,
        'italic': False,
        'strikethrough': False,
        'underline': False,
        'code': False,
        'color': 'default'},
       'plain_text': 'test',
       'href': None}]},
    'Pages': {'id': 'title',
     'type': 'title',
     't

In [83]:
class ResultFetcher:
    
    
    GET_CHILDREN_URL = 'https://api.notion.com/v1/blocks/{}/children' # Fill with id 
    GET_PAGE_INFO =  'https://api.notion.com/v1/pages/{}' # Fill with id 

    PAGE_COL_NAME = "Pages"
    # DESC_COL_NAME = "Description"
    
    """ 
    Fetch the results of the raw json response (from NotionAPIConnector)
    """
    
    def __init__(self, raw_response:dict, headers:dict):
        self.raw_response = raw_response
        self.headers = headers
        
    


        
    @staticmethod
    def create_initial_dict(raw_response: Dict[str, Any]) -> Dict[str, str]:
        return {page['properties']['Pages']['title'][0]['plain_text']: page['id'] for page in raw_response['results']} 
    
    @staticmethod
    def get_response_results(response_json:dict) -> list:
        return response_json.get("results", [])
    
    @staticmethod 
    def fetch_url(headers:dict, url:HttpUrl) -> Optional[dict]:
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            return response.json()
        else:
            response.raise_for_status() 
            
    @staticmethod 
    def fetch_children_from_page(id_:str, headers:dict) -> Optional[List[Dict]]:
        url = ResultFetcher.GET_CHILDREN_URL.format(id_)
        return ResultFetcher.get_response_results(ResultFetcher.fetch_url(headers=headers, url=url))
    
    @staticmethod 
    def fetch_page_info(id_:str, headers:dict) -> Optional[dict]:
        url = ResultFetcher.GET_PAGE_INFO.format(id_)
        return ResultFetcher.fetch_url(headers=headers, url=url)
    
    @staticmethod
    def is_it_a_container_page(pages:List[Dict]) -> bool:
        """ 
        Take the result of the "fetch_children_from_page" method
        """
        return all(page["type"] == "child_page" for page in pages)
    
    @staticmethod
    def get_page_title(page_info_response:dict) -> str:
        return page_info_response['properties']['Pages']['title'][0]['plain_text']
    

    def fetch_all_pages(self, page_id: str, subject_name:str) -> Generator[Dict[str, Any], None, None]:
        """
        Fonction récursive pour itérer sur toutes les pages et sous-pages.
        """
        children_results = self.fetch_children_from_page(page_id, self.headers)
        
        if not ResultFetcher.is_it_a_container_page(pages=children_results):
            try:
                page_infos:dict = ResultFetcher.fetch_page_info(id_=page_id, headers=self.headers)
                page_url:HttpUrl = page_infos["url"]
                page_title:str = ResultFetcher.get_page_title(page_info_response=page_infos)
            
                yield subject_name, {page_title : page_url}
            except:
                print("Error again")
        
        for block in children_results:
            
            if block:
                if block['type'] == 'child_page':
                    
                    
                    subpage_id = block['id']
                    
                    children_resp:List[Dict] = ResultFetcher.fetch_children_from_page(id_=subpage_id, headers=self.headers)
                    
                    if not ResultFetcher.is_it_a_container_page(pages=children_resp):
                        
                        try:
                            page_infos:dict = ResultFetcher.fetch_page_info(id_=subpage_id, headers=self.headers)
                            page_url:HttpUrl = page_infos["url"]
                            page_title:str = ResultFetcher.get_page_title(page_info_response=page_infos)
                        
                            yield subject_name, {page_title :  page_url}
                        except:
                            print("Error")
                    
                    # Recursive call to explore subpages as well 
                    yield from self.fetch_all_pages(page_id=subpage_id, subject_name=subject_name)

    def main(self) -> Generator[Dict[str, Any], None, None]:
        initial_dict = self.create_initial_dict(raw_response=self.raw_response)
        
        for main_page_name, main_page_id in initial_dict.items():
            
            # Start fetching from the main pages
            yield from self.fetch_all_pages(page_id=main_page_id, subject_name=main_page_name)
                    
                
    
    

    
        
fetcher = ResultFetcher(raw_response=raw_response, headers=headers)

initial_dict = fetcher.create_initial_dict(raw_response)

assert len(raw_response["results"]) == 3

In [84]:
for truc in fetcher.main():
    print(truc)

('Ton per', {'Ton per': 'https://www.notion.so/Ton-per-fcbca7da35174163936221525d5784f1'})
('ta mere', {'ta mere': 'https://www.notion.so/ta-mere-80497c9312324710b14db45a27563932'})
('Page test', {'Page test': 'https://www.notion.so/Page-test-e54dd41c3ac247bbb7218cbb744990de'})
Error
Error again
Error
Error again
Error
Error again


In [74]:
# Exemple ; On va chopper les enfants de cette main page
# Au passage on va faire en sorte de détecter si cette page est juste un container de page ou bien si elle contient du texte
id_ = initial_dict['Ton per']

r = fetcher.fetch_children_from_page(id_=id_, headers=headers)
r

[{'object': 'block',
  'id': '14c3cb01-ecb0-4518-a364-1291e6e4d109',
  'parent': {'type': 'page_id',
   'page_id': 'fcbca7da-3517-4163-9362-21525d5784f1'},
  'created_time': '2024-08-24T13:16:00.000Z',
  'last_edited_time': '2024-08-24T13:16:00.000Z',
  'created_by': {'object': 'user',
   'id': '095667f4-633c-4a3c-9ae2-b0da56bd7e27'},
  'last_edited_by': {'object': 'user',
   'id': '095667f4-633c-4a3c-9ae2-b0da56bd7e27'},
  'has_children': False,
  'archived': False,
  'in_trash': False,
  'type': 'child_page',
  'child_page': {'title': 'OMG une page ! (1)'}},
 {'object': 'block',
  'id': '604d0ff2-fe54-4013-bb67-08b58f8ba65c',
  'parent': {'type': 'page_id',
   'page_id': 'fcbca7da-3517-4163-9362-21525d5784f1'},
  'created_time': '2024-08-24T13:16:00.000Z',
  'last_edited_time': '2024-08-24T13:16:00.000Z',
  'created_by': {'object': 'user',
   'id': '095667f4-633c-4a3c-9ae2-b0da56bd7e27'},
  'last_edited_by': {'object': 'user',
   'id': '095667f4-633c-4a3c-9ae2-b0da56bd7e27'},
  'has_

In [53]:
page_id = '604d0ff2-fe54-4013-bb67-08b58f8ba65c'
t = fetcher.fetch_page_info(id_=page_id, headers=headers)
t["properties"]["title"]["title"][0]["text"]["content"]

'Truc (1)'

In [None]:
id_ = "fcbca7da-3517-4163-9362-21525d5784f1"
fetcher.fetch_page_info(id_=id_, headers=headers)

[]

In [None]:
url = f'https://api.notion.com/v1/pages/da-3517-4163-9362-21525d5784f1'

response = requests.get(url, headers=headers)

response.json()

{'object': 'error',
 'status': 400,
 'code': 'validation_error',
 'message': 'path failed validation: path.page_id should be a valid uuid, instead was `"da-3517-4163-9362-21525d5784f1"`.',
 'request_id': '7cbaf5b1-d69b-47c5-88f3-2abe84b4e510'}