# core.parser API Walkthrough

This notebook demonstrates how markdown and code cells are parsed into `ParsedCell` objects.

In [1]:
from pathlib import Path

from nbformat.v4 import new_markdown_cell
from notebook_summarizer.core.parser import parse_markdown_cell, parse_code_cell, parse_notebook
import pprint

### 1. Simple Markdown
A markdown cell with a heading and a paragraph.

In [2]:
md_cell = new_markdown_cell('# Introduction\n\nThis notebook demonstrates parsing.')
parsed = parse_markdown_cell(md_cell)
pprint.pprint(parsed)

ParsedCell(type='markdown',
           title='Introduction',
           bullets=[],
           paragraphs=['This notebook demonstrates parsing.'],
           code=None,
           images=[],
           table=None,
           raw_outputs=None,
           metadata={})


### 2. Markdown with Bullets
A markdown cell with a heading and a bullet list.

In [3]:
md_bullets = new_markdown_cell('# Key Findings\n\n- First insight\n- Second insight')
parsed_bullets = parse_markdown_cell(md_bullets)
pprint.pprint(parsed_bullets)

ParsedCell(type='markdown',
           title='Key Findings',
           bullets=['First insight', 'Second insight'],
           paragraphs=[],
           code=None,
           images=[],
           table=None,
           raw_outputs=None,
           metadata={})


### 3. Markdown with Link
A markdown paragraph that includes an inline link.

In [4]:
md_link = new_markdown_cell('# Data Source\n\nSee [BLS](https://bls.gov) for details.')
parsed_link = parse_markdown_cell(md_link)
pprint.pprint(parsed_link)

ParsedCell(type='markdown',
           title='Data Source',
           bullets=[],
           paragraphs=['See BLS (https://bls.gov) for details.'],
           code=None,
           images=[],
           table=None,
           raw_outputs=None,
           metadata={})


### 4.  Bullets containing link

In [5]:
md_bullet_link = new_markdown_cell('# Data Source\n\n- There is data\n- See [BLS](https://bls.gov) for details.')
parsed_bullet_link = parse_markdown_cell(md_bullet_link)
pprint.pprint(parsed_bullet_link)

ParsedCell(type='markdown',
           title='Data Source',
           bullets=['There is data', 'See BLS (https://bls.gov) for details.'],
           paragraphs=[],
           code=None,
           images=[],
           table=None,
           raw_outputs=None,
           metadata={})


### 5. Code Cell with Plot Output
(Manually simulate plot output with stub png.)

In [6]:
from notebook_summarizer.core.parser import parse_code_cell
import nbformat
import pprint

fake_code_cell = {
    "cell_type": "code",
    "source": "import matplotlib.pyplot as plt\nplt.plot([1, 2, 3], [4, 5, 6])\nplt.show()",
    "outputs": [
        {
            "output_type": "display_data",
            "data": {
                "image/png": "<base64-encoded-image>"
            }
        }
    ]
}
parsed_code = parse_code_cell(fake_code_cell)
pprint.pprint(parsed_code)

ParsedCell(type='code',
           title=None,
           bullets=[],
           paragraphs=[],
           code='import matplotlib.pyplot as plt\n'
                'plt.plot([1, 2, 3], [4, 5, 6])\n'
                'plt.show()',
           images=[ImageData(mime_type='image/png',
                             data='<base64-encoded-image>')],
           table=None,
           raw_outputs=[{'data': {'image/png': '<base64-encoded-image>'},
                         'output_type': 'display_data'}],
           metadata={})


### 6.  Code cell with table output
(Manually simulate table output using HTML.)

In [7]:
html_table = """
<table>
<thead>
    <tr><th>name</th><th>score</th></tr>
</thead>
<tbody>
    <tr><td>Alice</td><td>95</td></tr>
    <tr><td>Bob</td><td>88</td></tr>
</tbody>
</table>
"""

fake_code_cell = {
    "cell_type": "code",
    "source": "df.head()",
    "outputs": [
        {
            "output_type": "execute_result",
            "data": {
                "text/html": html_table
            }
        }
    ]
}


parsed_code = parse_code_cell(fake_code_cell)
pprint.pprint(parsed_code)


ParsedCell(type='code',
           title=None,
           bullets=[],
           paragraphs=[],
           code='df.head()',
           images=[],
           table=[{'name': 'Alice', 'score': 95}, {'name': 'Bob', 'score': 88}],
           raw_outputs=[{'data': {'text/html': '\n'
                                               '<table>\n'
                                               '<thead>\n'
                                               '    '
                                               '<tr><th>name</th><th>score</th></tr>\n'
                                               '</thead>\n'
                                               '<tbody>\n'
                                               '    '
                                               '<tr><td>Alice</td><td>95</td></tr>\n'
                                               '    '
                                               '<tr><td>Bob</td><td>88</td></tr>\n'
                                               '</tbody>\n'
   

### 7.  Apply Parser to Notebook

In [8]:
test_notebook_path = Path(".").resolve().parent / "examples" / "data" / "rich_demo_notebook_realtext.ipynb"
parsed = parse_notebook(test_notebook_path)
pprint.pprint(parsed)

{'cells': [ParsedCell(type='markdown',
                      title='📘 Sample Analysis Notebook',
                      bullets=[],
                      paragraphs=['This notebook demonstrates a basic data '
                                  "analysis workflow using Python. We'll begin "
                                  'by exploring a sample dataset, visualizing '
                                  'the raw data, and performing a linear '
                                  'regression analysis to uncover trends.'],
                      code=None,
                      images=[],
                      table=None,
                      raw_outputs=None,
                      metadata={}),
           ParsedCell(type='markdown',
                      title=None,
                      bullets=['Data analysis is a critical part of any '
                               'decision-making process',
                               'This notebook demonstrates linear regression '
                   

In [9]:
merged_cell = parsed['cells'][0].merge_cells(parsed['cells'][1:4])
pprint.pprint(merged_cell)

ParsedCell(type='markdown',
           title='📘 Sample Analysis Notebook',
           bullets=['Data analysis is a critical part of any decision-making '
                    'process',
                    'This notebook demonstrates linear regression on a simple '
                    'dataset',
                    'We demonstrate the case for noise-free and noisy data'],
           paragraphs=['This notebook demonstrates a basic data analysis '
                       "workflow using Python. We'll begin by exploring a "
                       'sample dataset, visualizing the raw data, and '
                       'performing a linear regression analysis to uncover '
                       'trends.',
                       '📚 Background'],
           code=None,
           images=[ImageData(mime_type='image/png',
                             data='iVBORw0KGgoAAAANSUhEUgAAAiEAAAGJCAYAAABcsOOZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8ekN5oAAAACXB