In [1]:
import json
import os
from pathlib import Path

def get_chrome_bookmarks():
    # Путь к файлу закладок (зависит от ОС)
    if os.name == 'nt':  # Windows
        path = Path(os.getenv('LOCALAPPDATA')) / 'Google' / 'Chrome' / 'User Data' / 'Default' / 'Bookmarks'
    
    if not path.exists():
        raise FileNotFoundError("Chrome bookmarks file not found")
    
    with open(path, 'r', encoding='utf-8') as f:
        bookmarks = json.load(f)
    
    return bookmarks

def print_bookmarks(bookmarks, indent=0):
    if 'children' in bookmarks:
        for child in bookmarks['children']:
            if child['type'] == 'url':
                print(' ' * indent + f"- {child['name']} ({child['url']})")
            elif child['type'] == 'folder':
                print(' ' * indent + f"+ {child['name']}")
                print_bookmarks(child, indent + 4)

# Пример использования
bookmarks = get_chrome_bookmarks()
print_bookmarks(bookmarks['roots']['bookmark_bar'])  # Панель закладок
print_bookmarks(bookmarks['roots']['other'])  

+ Закладка 1
    + закладка 2 вложенность
    + о да
        - Работа в Москве, поиск персонала и публикация вакансий - hh.ru (https://hh.ru/?ysclid=meh9s6b2fu838199358)
    - YouTube (https://www.youtube.com/?ysclid=meh9siu11v112291115)
+ Новая папка


In [33]:
import pandas as pd
import re

def parse_bookmarks_to_dataframe(html_file):
    """
    Parses a Netscape-style bookmarks HTML file and returns a Pandas DataFrame.

    Args:
        html_file (str): Path to the HTML file.

    Returns:
        pandas.DataFrame: DataFrame with columns 'Название', 'URL', 'Вложенность'.
                           Returns None if the file is invalid or an error occurs.
    """

    try:
        with open(html_file, 'r', encoding='utf-8') as f:  # Explicit encoding
            html_content = f.read()
    except FileNotFoundError:
        print(f"Error: File not found: {html_file}")
        return None
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

    bookmarks = []
    stack = []
    nesting_level = 0

    for line in html_content.splitlines():
        line = line.strip()  # Remove leading/trailing whitespace

        if "<DL><p>" in line:
            nesting_level += 1
            stack.append(nesting_level)
        elif "</DL><p>" in line:
            if stack:
                stack.pop()
            nesting_level = len(stack)

        elif "<DT><H3" in line:
            match = re.search(r">(.*?)</H3>", line)
            if match:
                title = match.group(1)
                bookmarks.append({'Название': title, 'URL': None, 'Вложенность': nesting_level})

        elif "<DT><A HREF=" in line:
            url_match = re.search(r'HREF="(.*?)"', line)
            title_match = re.search(r'>(.*?)</A>', line)  # Get title from the link itself
            if url_match and title_match:
                url = url_match.group(1)
                title = title_match.group(1)  # Use link text as title

                bookmarks.append({'Название': title, 'URL': url, 'Вложенность': nesting_level})


    df = pd.DataFrame(bookmarks)
    return df


# Example Usage:  Replace 'bookmarks.html' with the actual path to your file
file_path = 'bookmarks_18.08.2025.html'  # IMPORTANT: change this to the actual filename
df = parse_bookmarks_to_dataframe(file_path)

if df is not None:
    print(df)

    # Optional: Save to CSV
    #df.to_csv("bookmarks.csv", index=False, encoding='utf-8')  # Save to CSV file
    #print("DataFrame saved to bookmarks.csv")

                                            Название  \
0  <H3 ADD_DATE="1755530882" LAST_MODIFIED="0" PE...   
1  <H3 ADD_DATE="1755537164" LAST_MODIFIED="17555...   
2  <H3 ADD_DATE="1755537164" LAST_MODIFIED="17555...   
3  <H3 ADD_DATE="1755537164" LAST_MODIFIED="17555...   
4  <A HREF="https://hh.ru/?ysclid=meh9s6b2fu83819...   
5  <A HREF="https://www.youtube.com/?ysclid=meh9s...   
6  <H3 ADD_DATE="1755537164" LAST_MODIFIED="17555...   

                                                 URL  Вложенность  
0                                               None            1  
1                                               None            2  
2                                               None            3  
3                                               None            3  
4          https://hh.ru/?ysclid=meh9s6b2fu838199358            4  
5  https://www.youtube.com/?ysclid=meh9siu11v1122...            3  
6                                               None            1  
