In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
import json
from typing import Dict, List, Any

class JSONSplitter:
    """Splitter specialized for JSON/Dictionary data"""
    def __init__(self,
                 chunk_size: int = 1000,
                 chunk_overlap: int = 200):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        )

**Example Scenarios**

In [None]:
# Notebook Title: JSON and Dictionary Splitting Strategies
# This notebook demonstrates different approaches to splitting JSON/Dictionary data
# while maintaining structural integrity and relationships.

import json
from typing import Dict, List, Any

class JSONSplitter:
    """Splitter specialized for JSON/Dictionary data"""

    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap

def test_nested_structures():
    """Example 1: Handling deeply nested JSON structures"""

    nested_json = {
        "organization": {
            "name": "Tech Corp",
            "departments": {
                "engineering": {
                    "teams": {
                        "frontend": {
                            "projects": ["Web App", "Mobile App"],
                            "members": ["Alice", "Bob"]
                        },
                        "backend": {
                            "projects": ["API", "Database"],
                            "members": ["Charlie", "David"]
                        }
                    }
                }
            }
        }
    }

    # Process and show chunks
    print("Nested JSON Example:")
    print(json.dumps(nested_json, indent=2))

def test_array_handling():
    """Example 2: Processing arrays and lists"""

    array_json = {
        "dataset": {
            "name": "Customer Records",
            "records": [
                {
                    "id": 1,
                    "name": "John Doe",
                    "transactions": [
                        {"date": "2025-01-01", "amount": 100},
                        {"date": "2025-01-02", "amount": 200}
                    ]
                },
                {
                    "id": 2,
                    "name": "Jane Smith",
                    "transactions": [
                        {"date": "2025-01-03", "amount": 300},
                        {"date": "2025-01-04", "amount": 400}
                    ]
                }
            ]
        }
    }

    print("\nArray Handling Example:")
    print(json.dumps(array_json, indent=2))

def test_complex_types():
    """Example 3: Managing complex data types"""

    complex_json = {
        "metadata": {
            "created": "2025-02-10T12:00:00Z",
            "version": 1.0,
            "enabled": True,
            "tags": ["important", "verified"],
            "config": {
                "timeout": 30,
                "retries": None,
                "parameters": {
                    "x": 1.5,
                    "y": "string",
                    "z": [1, 2, 3]
                }
            }
        }
    }

    print("\nComplex Types Example:")
    print(json.dumps(complex_json, indent=2))

def test_references():
    """Example 4: Preserving references and relationships"""

    reference_json = {
        "users": {
            "user1": {"name": "Alice", "role": "admin"},
            "user2": {"name": "Bob", "role": "user"}
        },
        "permissions": {
            "admin": ["read", "write", "delete"],
            "user": ["read"]
        },
        "assignments": {
            "project1": {
                "owner": "user1",
                "members": ["user1", "user2"]
            }
        }
    }

    print("\nReferences Example:")
    print(json.dumps(reference_json, indent=2))

def run_all_examples():
    """Run all JSON splitting examples"""
    print("Testing JSON Splitting Scenarios...\n")
    test_nested_structures()
    test_array_handling()
    test_complex_types()
    test_references()

if __name__ == "__main__":
    run_all_examples()