In [1]:
import json
from typing import Any, Dict
from pathlib import Path

def analyze_json_structure(data: Any, prefix: str = "") -> None:
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"{prefix}Key: {key}")
            analyze_json_structure(value, prefix + "  ")
    elif isinstance(data, list) and len(data) > 0:
        print(f"{prefix}Array containing {len(data)} items")
        analyze_json_structure(data[0], prefix + "  ")

def read_chat_export(file_path: str) -> None:
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            print("Chat Export Structure:")
            analyze_json_structure(data)
            
            # Extract the first record and save it to a new JSON file
            if isinstance(data, list) and len(data) > 0:
                first_record = data[0]
                with open('first_record.json', 'w', encoding='utf-8') as out_f:
                    json.dump(first_record, out_f, ensure_ascii=False, indent=4)
                print("First record saved to first_record.json")
            else:
                print("No records found in the JSON file")
    except json.JSONDecodeError:
        print("Invalid JSON file")
    except FileNotFoundError:
        print("File not found")


file_path = "../data/open-webui/open-webui-chat-export.json"
read_chat_export(file_path)

Chat Export Structure:
Array containing 344 items
  Key: id
  Key: user_id
  Key: title
  Key: chat
    Key: id
    Key: title
    Key: models
      Array containing 1 items
    Key: params
    Key: history
      Key: messages
        Key: 6df1ebf3-04de-49a1-8b60-8a628886878d
          Key: id
          Key: parentId
          Key: childrenIds
            Array containing 1 items
          Key: role
          Key: content
          Key: timestamp
          Key: models
            Array containing 1 items
        Key: 3f214839-f6b5-4686-9255-d7b58df58089
          Key: parentId
          Key: id
          Key: childrenIds
            Array containing 1 items
          Key: role
          Key: content
          Key: model
          Key: modelName
          Key: modelIdx
          Key: userContext
          Key: timestamp
          Key: lastSentence
          Key: done
        Key: 973ad34e-3fc0-4657-95b3-f5aad7a92d43
          Key: id
          Key: parentId
          Key: childrenIds
  