Jupyter plugin V2 (#917)

Compared to V2: - support module_name as positional or named arg (-m/--module_name) - support -c/--config - support -h/--help - ignores -r/--rebuild-drivers - ignores -v/--verbose - richer support for -c/--config - alias %%insert_module to %%module_to_cell - creates a temporary file for the temporary modules to play nicely with Hamilton UI - in `incr_cell_to_module`, -i/--identifier is now a required positional argument - added try/except over dynamic module creation Others: - fixed dynamic module registration in ad_hoc_utils.create_module() which was causing a bug for function_modifiers.macros (e.g., @pipe) - updated existing examples/ notebooks --------- Co-authored-by: zilto <tjean@DESKTOP-V6JDCS2> Co-authored-by: Stefan Krawczyk <stefan@dagworks.io>
DAGWorks-Inc · May 28, 2024 · 831a093 · 831a093
1 parent f70af85
commit 831a093
Show file tree

Hide file tree

Showing 6 changed files with 2,344 additions and 757 deletions.
diff --git a/examples/LLM_Workflows/NER_Example/notebook.ipynb b/examples/LLM_Workflows/NER_Example/notebook.ipynb
@@ -190,7 +190,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module ner_module -i 1 --display\n",
+    "%%incr_cell_to_module ner_module 1 --display\n",
     "\n",
     "from datasets import Dataset\n",
     "from hamilton.function_modifiers import load_from, save_to, source, value\n",
@@ -419,7 +419,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module ner_module -i 2 --display\n",
+    "%%incr_cell_to_module ner_module 2 --display\n",
     "\n",
     "import torch\n",
     "from transformers import (\n",
@@ -505,7 +505,7 @@
    "source": [
     "# this is what the NER pipeline produces\n",
     "text = \"The Mars Rover from NASA reached the red planet yesterday.\"\n",
-    "ner_pipeline(model(NER_model_id()), tokenizer(NER_model_id()), \"cpu\")([text])"
+    "ner_module.ner_pipeline(model(NER_model_id()), tokenizer(NER_model_id()), \"cpu\")([text])"
    ]
   },
   {
@@ -720,7 +720,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module ner_module -i 3 --display\n",
+    "%%incr_cell_to_module ner_module 3 --display\n",
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
     "def retriever(\n",
@@ -767,7 +767,7 @@
    ],
    "source": [
     "# what the embedding model produces -- just show first 10 numbers\n",
-    "retriever(\"cpu\").encode([\"this is some text\"])[0][0:10]"
+    "ner_module.retriever(\"cpu\").encode([\"this is some text\"])[0][0:10]"
    ]
   },
   {
@@ -1060,7 +1060,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module ner_module -i 4 --display\n",
+    "%%incr_cell_to_module ner_module 4 --display\n",
     "from datasets.formatting.formatting import LazyBatch\n",
     "from typing import Union\n",
     "\n",
@@ -1937,7 +1937,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module ner_module -i 5 --display \n",
+    "%%incr_cell_to_module ner_module 5 --display \n",
     "\n",
     "import lancedb\n",
     "import numpy as np\n",

diff --git a/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb b/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb
@@ -131,7 +131,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module doc_pipeline -i 1 --display\n",
+    "%%incr_cell_to_module doc_pipeline 1 --display\n",
     "\n",
     "from typing import NamedTuple, Optional\n",
     "\n",
@@ -266,7 +266,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module doc_pipeline -i 2 --display\n",
+    "%%incr_cell_to_module doc_pipeline 2 --display\n",
     "import requests \n",
     "import re\n",
     "import uuid\n",
@@ -320,7 +320,7 @@
     "# print(requests.get(\"https://hamilton.dagworks.io/en/latest/concepts/best-practices/code-organization/\").text)\n",
     "# we can test that this works by running the functions:\n",
     "url = \"https://hamilton.dagworks.io/en/latest/concepts/best-practices/code-organization/\"\n",
-    "raw_document(url, html_regex())"
+    "doc_pipeline.raw_document(url, doc_pipeline.html_regex())"
    ]
   },
   {
@@ -480,7 +480,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module doc_pipeline -i 3 --display\n",
+    "%%incr_cell_to_module doc_pipeline 3 --display\n",
     "\n",
     "from langchain import text_splitter\n",
     "\n",
@@ -548,7 +548,7 @@
    ],
    "source": [
     "# example what the HTML chunker is doing:\n",
-    "html_chunker().split_text(\"<h1>title</h1><p>some text</p><p>some more text</p><h2>subsection1</h2><p>section text<br/> more text</p>\")"
+    "doc_pipeline.html_chunker().split_text(\"<h1>title</h1><p>some text</p><p>some more text</p><h2>subsection1</h2><p>section text<br/> more text</p>\")"
    ]
   },
   {
@@ -579,7 +579,7 @@
    ],
    "source": [
     "# example what the text chunker is doing\n",
-    "text_chunker(5, 0).split_text(\"this is some text\")"
+    "doc_pipeline.text_chunker(5, 0).split_text(\"this is some text\")"
    ]
   },
   {
@@ -771,7 +771,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module doc_pipeline -i 4 --display\n",
+    "%%incr_cell_to_module doc_pipeline 4 --display\n",
     "import openai\n",
     "\n",
     "def client() -> openai.OpenAI:\n",
@@ -1813,7 +1813,7 @@
    ],
    "source": [
     "# example\n",
-    "client().embeddings.create(input=\"this is some text that will change into a vector\", model=\"text-embedding-3-small\").data[0].embedding"
+    "doc_pipeline.client().embeddings.create(input=\"this is some text that will change into a vector\", model=\"text-embedding-3-small\").data[0].embedding"
    ]
   },
   {
@@ -2011,7 +2011,7 @@
     }
    ],
    "source": [
-    "%%incr_cell_to_module doc_pipeline -i 5 --display\n",
+    "%%incr_cell_to_module doc_pipeline 5 --display\n",
     "import pandas as pd\n",
     "\n",
     "def store(\n",
@@ -3051,7 +3051,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%incr_cell_to_module parallel_pipeline -i 1 \n",
+    "%%cell_to_module parallel_pipeline\n",
     "# we create a new module called parallel_pipeline\n",
     "from hamilton.htypes import Collect, Parallelizable\n",
     "import pandas as pd\n",

diff --git a/examples/jupyter_notebook_magic/README.md b/examples/jupyter_notebook_magic/README.md
@@ -1,25 +1,27 @@
-# This example shows a notebook using the Hamilton Jupyter magic
+# Hamilton notebook extension
+
+One of the best part about notebooks is the ability to execute and immediately inspect results. They provide a "read-eval-print" loop (REPL) coding experience. However, the way Hamilton separates dataflow definition (functions in a module) from execution (building and executing a driver) creates an extra step that can slowdown this loop.
+
+We built the Hamilton notebook extension to tighten that loop and even give a better experience than the core notebook experience!
 
 To load the magic:
 ```python
-# load some extensions / magic...
 %load_ext hamilton.plugins.jupyter_magic
 ```
 
-Then to use it:
+For example, this would allow you to define the module `joke` from your notebook
 
 ```python
-%%cell_to_module -m MODULE_NAME # more args
-```
-Other arguments (--help to print this.):
-  -m, --module_name: Module name to provide. Default is jupyter_module.
-  -c, --config: JSON config string, or variable name containing config to use.
-  -r, --rebuild-drivers: Flag to rebuild drivers.
-  -d, --display: Flag to visualize dataflow.
-  -v, --verbosity: of standard output. 0 to hide. 1 is normal, default.
+%%cell_to_module joke --display
+def topic() -> str:
+    return "Cowsay"
 
-Example use:
+def joke_prompt(topic: str) -> str:
+    return f"Knock, knock. Who's there? {topic}"
 
-```python
-%%cell_to_module -m MODULE_NAME --display --rebuild-drivers
+def reply(joke_prompt: str) -> str:
+    _, _, right = joke_prompt.partition("? ")
+    return f"{right} who?"
 ```
+
+Go explore `tutorial.ipynb` to learn about all interactive features!