In [1]:
from langchain_text_splitters import MarkdownHeaderTextSplitter
from typing import List, Dict

class MarkdownSplitter:
    """Splitter specialized for Markdown documents"""
    def __init__(self):
        self.headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
            ("###", "Header 3")
        ]

    def split_markdown(self, markdown_text: str) -> List[str]:
        """Split markdown while preserving header hierarchy"""
        splitter = MarkdownHeaderTextSplitter(
            headers_to_split_on=self.headers_to_split_on
        )
        splits = splitter.split_text(markdown_text)
        return splits

**Example Scenarios**

In [None]:
def test_header_hierarchies():
   """Test handling nested markdown header structures"""
   markdown_text = """
   # Machine Learning Guide
   Introduction to key ML concepts.

   ## Supervised Learning
   Understanding supervised learning approaches.

   ### Classification
   Binary and multi-class classification examples.

   ### Regression
   Predicting continuous values.

   ## Unsupervised Learning
   Exploring patterns in unlabeled data.

   ### Clustering
   K-means and hierarchical clustering approaches.
   """

   splitter = MarkdownSplitter()
   chunks = splitter.split_markdown(markdown_text)
   return chunks

def test_code_blocks():
   """Test handling markdown with code blocks"""
   markdown_with_code = """
   # Python Examples
   Here are some coding examples.

   ## Function Definition
   Basic function example:
   ```python
   def process_data(data):
       return data.mean()
   ```

   ## Class Definition
   Object-oriented example:
   ```python
   class DataProcessor:
       def __init__(self, data):
           self.data = data
   ```
   """

   splitter = MarkdownSplitter()
   chunks = splitter.split_markdown(markdown_with_code)
   return chunks

def test_formatting_and_links():
   """Test handling inline formatting and links"""
   formatted_markdown = """
   # Documentation Guide
   Here's a **bold statement** about documentation.

   ## Links Section
   Visit our [documentation](https://docs.example.com) for more.

   ## Formatting Examples
   - *Italic text* for emphasis
   - `inline code` for commands
   - ~~strikethrough~~ for outdated info
   """

   splitter = MarkdownSplitter()
   chunks = splitter.split_markdown(formatted_markdown)
   return chunks

def test_tables():
   """Test handling markdown tables"""
   markdown_with_tables = """
   # Data Summary
   Key metrics from our analysis.

   ## Results Table
   | Model | Accuracy | F1 Score |
   |-------|----------|----------|
   | SVM   | 0.92     | 0.90     |
   | RF    | 0.94     | 0.93     |

   ## Performance Matrix
   | Metric | Train | Test |
   |--------|-------|------|
   | Loss   | 0.02  | 0.03 |
   """

   splitter = MarkdownSplitter()
   chunks = splitter.split_markdown(markdown_with_tables)
   return chunks

def run_all_examples():
   """Run and display all markdown splitting examples"""
   print("Testing Header Hierarchies:")
   print("-" * 50)
   chunks = test_header_hierarchies()
   for i, chunk in enumerate(chunks, 1):
       print(f"\nChunk {i}:")
       print(chunk)

   print("\nTesting Code Blocks:")
   print("-" * 50)
   chunks = test_code_blocks()
   for i, chunk in enumerate(chunks, 1):
       print(f"\nChunk {i}:")
       print(chunk)

   print("\nTesting Formatting and Links:")
   print("-" * 50)
   chunks = test_formatting_and_links()
   for i, chunk in enumerate(chunks, 1):
       print(f"\nChunk {i}:")
       print(chunk)

   print("\nTesting Tables:")
   print("-" * 50)
   chunks = test_tables()
   for i, chunk in enumerate(chunks, 1):
       print(f"\nChunk {i}:")
       print(chunk)

if __name__ == "__main__":
   run_all_examples()