In [27]:
from pathlib import Path
import anthropic
import nbformat
from api_keys import anthropic_api_key
client = anthropic.Anthropic(api_key=anthropic_api_key)

In [28]:
notebook = nbformat.read("../lecture_notes/lesson02_numpy.ipynb", as_version=4)

In [46]:
def get_output(cell):
    output_lines = []
    if cell.cell_type != "code":
        return ""

    for current_output in cell.outputs:
        if current_output.get('output_type') == "stream":
            output_lines.extend(current_output.text.splitlines())
        elif current_output.get('output_type') == "error":
            output_lines.append(f"Error: {current_output.ename}: {current_output.evalue}")
        elif current_output.get('data') and current_output.get('data').get('text/plain'):
            output_lines.extend(current_output.data['text/plain'].splitlines())
        else:
            print(f"Uknown output: {current_output}. Ignoring")
    return "".join(f"# {line}\n" for line in output_lines).rstrip()


In [48]:
def read_notebook(notebook_path):
    notebook = nbformat.read(notebook_path, as_version=4)
    notebook_cells = ["# %% [markdown]\n## Lesson 2. Numpy\n\n"]
    for cell in notebook.cells:
        if cell.cell_type == "code":
            output = get_output(cell)
            if output:
                output = "\n\n# Output:\n" + output
            notebook_cells.append("# %%\n" + cell.source + output + "\n\n")
        elif cell.cell_type == "markdown":
            notebook_cells.append("# %% [markdown]\n" + cell.source + "\n\n")
    notebook_cells.append("# %% [markdown]\nThat's all for today!\n\n")
    return notebook_cells

def is_code_cell(cell):
    return cell.startswith("# %%\n")

def is_markdown_cell(cell):
    return cell.startswith("# %% [markdown]\n")

def n_code_cells(notebook_cells):
    return sum(1 for cell in notebook_cells if is_code_cell(cell))

def n_markdown_cells(notebook_cells):
    return sum(1 for cell in notebook_cells if is_markdown_cell(cell))

In [49]:
def get_context(notebook_cells, i_cell, cells_before=3, cells_after=3):
    start_index = max(0, i_cell - cells_before)
    end_index = min(i_cell + cells_after, len(notebook_cells))
    return "".join(notebook_cells[start_index:end_index]), start_index, end_index

In [50]:
def get_index_of_code_cell(notebook_cells, i_cell):
    n_code_cells = -1
    for i, cell in enumerate(notebook_cells):
        if is_code_cell(cell):
            n_code_cells += 1
        if i_cell == n_code_cells:
            return i
    return -1


In [51]:
def split_cells(fragment: str):
    code_cells = []
    chunk = []
    for line in fragment.splitlines(keepends=True):
        if line.startswith("# %% [markdown]") or line.startswith("# %%"):
            if chunk:
                code_cells.append(chunk)
            chunk = [line]
        else:
            chunk.append(line)
    if chunk:
        code_cells.append(chunk)
    return ["".join(code_cell) for code_cell in code_cells]


In [None]:
def process_context_with_claude(context):
    messages = [
        {
            "role": "user",
            "content": (f"You are given a fragment of a Python notebook in Jupytext format that I wrote during a lecture. "
                        f"I want to make lecture notes from it. Edit this fragment appropriately. "
                        f"Add comments and clarifications. "
                        f"You are allowed to add or edit markdown cells inside the fragment if necessary. "
                        f"Do not change the code cells. Do not add any new code cells. "
                        f"Do not remove any code cells. "
                        f"Reply with the new fragment only. "
                        f"It will be inserted into the notebook instead of the old fragment. "
                        f"According to the Jupytext format, code cells start with '# %%'. "
                        f"Markdown cells start with '# %% [markdown]'. Any line in the markdown cell should start with '#', followed by a whitespace. "
                        f"In markdown cells, when refering to variables, functions, etc., use inline style, like `variable_name` or `function_name`. \n"
                        f"Add explanation based on the fragment. Do not add any general words. Do not use bulleted or numbered lists. "
                        f"You do not need to explain every single line. Only explain the parts that are not obvious for a student who is familiar with Python but not with NumPy. "
                        f"Only add explanations if necessary. Do not add any explanations if the code is self-explanatory. "
                        f"Make sure that the overall explanation is smooth. "
                        f"Here is the fragment:\n\n{context}\n\n\n"
                        f"Reply with the text of the new fragment only, without any other text. Your output will be directly inserted into the notebook.")
        }
    ]
    system_message = "You are a helpful assistant that writes lecture notes on Python."
    response = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=4000,
        messages=messages,
        system=system_message,
    )
    return response.content[0].text

In [56]:
notebook_cells = read_notebook("../lecture_notes/lesson02_numpy.ipynb")
for i_code_cell in range(n_code_cells(notebook_cells)):
    fragment, start_index, end_index = get_context(
        notebook_cells, get_index_of_code_cell(notebook_cells, i_code_cell)
    )
    print(f"Processing fragment:\n\n{fragment}\n")
    while True:
        new_fragment = process_context_with_claude(fragment).rstrip() + "\n\n"
        print(f"New fragment:\n\n{new_fragment}\n")
        if n_code_cells(split_cells(new_fragment)) == n_code_cells(split_cells(fragment)):
            break
        print("Number of code cells in the new fragment does not match the original fragment. We'll try again.")
    notebook_cells[start_index:end_index] = split_cells(new_fragment)
    print("----------------------------------------")
Path("../lecture_notes/lesson02_numpy_processed.py").write_text("".join(notebook_cells))

Processing fragment:

# %% [markdown]
## Lesson 2. Numpy

# %%
import numpy as np

# %%
arr = np.array([1, 2, 3, 4, 5])

# %%
type(arr)

# Output:
# numpy.ndarray



New fragment:

# %% [markdown]
# ## Lesson 2. NumPy
# 
# NumPy is a fundamental package for scientific computing in Python. It provides support for large, multi-dimensional arrays and matrices, along with a collection of mathematical functions to operate on these arrays efficiently.

# %%
import numpy as np

# %%
arr = np.array([1, 2, 3, 4, 5])

# %% [markdown]
# Here, we create a NumPy array using the `np.array()` function. Unlike Python lists, NumPy arrays are homogeneous, meaning all elements must be of the same data type. This allows for more efficient storage and computation.

# %%
type(arr)

# Output:
# numpy.ndarray

# %% [markdown]
# The `type()` function reveals that `arr` is of type `numpy.ndarray`. This stands for "N-dimensional array", which is the primary data structure in NumPy. It's more efficient than Pytho

49321