diff --git a/CHANGELOG.md b/CHANGELOG.md index 541f5e738..f7c800915 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Docs - Added federated learning showcase code +- Updated tutorial for redispatching workflows with Streamlit ### Tests diff --git a/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_covalent_imagegen.gif b/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_covalent_imagegen.gif index 4a1cb2fa4..2c22fe5df 100644 Binary files a/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_covalent_imagegen.gif and b/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_covalent_imagegen.gif differ diff --git a/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_workflow.gif b/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_workflow.gif new file mode 100644 index 000000000..c1e6ce0b7 Binary files /dev/null and b/doc/source/tutorials/0_ClassicalMachineLearning/genai/assets/streamlit_workflow.gif differ diff --git a/doc/source/tutorials/0_ClassicalMachineLearning/genai/requirements.txt b/doc/source/tutorials/0_ClassicalMachineLearning/genai/requirements.txt index 7dca3bbee..d036f6ae7 100644 --- a/doc/source/tutorials/0_ClassicalMachineLearning/genai/requirements.txt +++ b/doc/source/tutorials/0_ClassicalMachineLearning/genai/requirements.txt @@ -3,11 +3,9 @@ bs4==0.0.1 covalent-azurebatch-plugin==0.12.0 diffusers==0.19.3 emoji==2.8.0 -fastapi==0.103.1 Pillow==9.5.0 sentencepiece==0.1.99 streamlit==1.25.0 torch==2.0.1 transformers==4.31.0 -uvicorn==0.18.3 xformers==0.0.21 diff --git a/doc/source/tutorials/0_ClassicalMachineLearning/genai/source.ipynb b/doc/source/tutorials/0_ClassicalMachineLearning/genai/source.ipynb index 85d9cc184..d41a4f828 100644 --- a/doc/source/tutorials/0_ClassicalMachineLearning/genai/source.ipynb +++ b/doc/source/tutorials/0_ClassicalMachineLearning/genai/source.ipynb @@ -38,14 +38,14 @@ "source": [ "# Getting started\n", "\n", - "This tutorial requires [PyTorch](https://pytorch.org/), [Diffusers](https://github.com/huggingface/diffusers), [Hugging Face Transformers](https://huggingface.co/docs/transformers/index) for generative AI. [Streamlit](https://streamlit.io/) and [FastAPI](https://fastapi.tiangolo.com/) will serve to make the user experience smooth. To install all of them, simply use the `requirements.txt` file to replicate this notebook. \n", + "This tutorial requires [PyTorch](https://pytorch.org/), [Diffusers](https://github.com/huggingface/diffusers), [Hugging Face Transformers](https://huggingface.co/docs/transformers/index) for generative AI. [Streamlit](https://streamlit.io/) will serve to make the user experience smooth. To install all of them, simply use the `requirements.txt` file to replicate this notebook. \n", "\n", "The list of packages required to run this tutorial is listed below." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "id": "03a0c441-40ec-4353-82eb-4d395fb8e4e2", "metadata": {}, "outputs": [ @@ -53,19 +53,17 @@ "name": "stdout", "output_type": "stream", "text": [ + "accelerate==0.21.0\n", "bs4==0.0.1\n", - "transformers==4.31.0\n", + "covalent-azurebatch-plugin==0.12.0\n", "diffusers==0.19.3\n", - "sentencepiece==0.1.99\n", - "torch==2.0.1\n", - "accelerate==0.21.0\n", + "emoji==2.8.0\n", "Pillow==9.5.0\n", + "sentencepiece==0.1.99\n", "streamlit==1.25.0\n", - "xformers==0.0.21\n", - "emoji==2.8.0\n", - "covalent-azurebatch-plugin==0.12.0\n", - "fastapi==0.103.1\n", - "uvicorn==0.18.3\n" + "torch==2.0.1\n", + "transformers==4.31.0\n", + "xformers==0.0.21\n" ] } ], @@ -77,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 3, "id": "3846e49b-f0fa-4878-b733-5fa53c1452af", "metadata": {}, "outputs": [], @@ -88,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "id": "063f6e62-402f-4232-b6e2-f94568aee059", "metadata": {}, "outputs": [], @@ -109,12 +107,7 @@ "from PIL import Image, ImageDraw, ImageFont\n", "import covalent as ct\n", "import torch\n", - "\n", - "\n", - "# setting loggers to info to avoid too many debug messages\n", - "loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]\n", - "for logger in loggers:\n", - " logger.setLevel(logging.INFO)" + "import logging" ] }, { @@ -130,13 +123,20 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 7, "id": "13658ad0-c002-4ef6-bc4d-ad9fed565443", "metadata": {}, "outputs": [], "source": [ "# save under workflow.py\n", "\n", + "# setting loggers to info to avoid too many debug messages\n", + "loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]\n", + "for logger in loggers:\n", + " logger.setLevel(logging.INFO)\n", + "\n", + "\n", + "\n", "# define dependencies to install on remote execution\n", "DEPS_ALL = ct.DepsPip(\n", " packages=[\n", @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 5, "id": "113c6ecd-719a-4be1-940b-d38fefe26733", "metadata": {}, "outputs": [], @@ -234,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 6, "id": "8dde7bfe-ead8-4b4b-b2c8-3633f0af9c29", "metadata": {}, "outputs": [], @@ -284,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "afbe7311-28e8-4f14-8598-e3378ebec946", "metadata": {}, "outputs": [ @@ -292,7 +292,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "a659c4c8-bb63-4ebd-9c02-1c2e02b52591\n" + "36d7f373-705a-46d6-8ac5-ed57cac8e332\n" ] } ], @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 9, "id": "83724676-0bb2-4248-a8ce-08fa5781f706", "metadata": {}, "outputs": [], @@ -422,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "a010e2cf-b86f-4af1-b513-3ba8753a8d2a", "metadata": {}, "outputs": [], @@ -468,55 +468,20 @@ "id": "3a9ab8e9-f61e-4671-961e-91f520e0ae04", "metadata": {}, "source": [ - "Finally, we will upload the image to Azure blob storage. The URL is used in the Streamlit app which then downloads and renders the image. " + "Finally, we will upload the save the image to our local machine and transfer it to Streamlit." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "bf8c5f33-9b56-45c7-b10e-6fbebb43bfad", "metadata": {}, "outputs": [], "source": [ - "# Your Azure Storage account connection string\n", - "connection_string = \"\"\n", - "\n", - "# Name of the container where you want to upload the file\n", - "container_name = \"\"\n", - "\n", - "# Name for the blob (file) in the container\n", - "blob_name = \"\"\n", - "\n", - "@ct.electron(executor=azure_cpu_executor, deps_pip=DEPS_ALL, files=[ft])\n", - "def save_image(image, filename='file_destination'):\n", + "@ct.electron\n", + "def save_image(image, filename='image'):\n", " image.save(f\"{filename}.jpg\")\n", - " # Create a BlobServiceClient\n", - " blob_service_client = BlobServiceClient.from_connection_string(connection_string)\n", - " # Get a reference to the container\n", - " container_client = blob_service_client.get_container_client(container_name)\n", - " \n", - " # Create or get a BlobClient to upload the file\n", - " blob_client = container_client.get_blob_client(blob_name)\n", - "\n", - " # Upload the file\n", - " with open(f\"{filename}.jpg\", \"rb\") as data:\n", - " blob_client.upload_blob(data)\n", - "\n", - " # Set the blob's access level to Blob (public read access)\n", - " blob_client.set_blob_access_tier(\"Cool\")\n", - " blob_client.set_blob_access_policy(sas_token=None, permission=BlobSasPermissions(read=True), expiry=datetime.datetime.utcnow() + datetime.timedelta(days=365))\n", - "\n", - " sas_token = generate_blob_sas(\n", - " account_name=blob_service_client.account_name,\n", - " container_name=container_name,\n", - " blob_name=blob_name,\n", - " account_key=blob_service_client.credential.account_key,\n", - " permission=BlobSasPermissions(read=True),\n", - " expiry=datetime.datetime.utcnow() + datetime.timedelta(days=365)\n", - " )\n", - " blob_url_with_sas = f\"https://{blob_service_client.account_name}.blob.core.windows.net/{container_name}/{blob_name}?{sas_token}\"\n", - "\n", - " return blob_url_with_sas" + " return image" ] }, { @@ -541,7 +506,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "a9e020a7-628f-4e3f-82fc-7e90fd379d94\n" + "eb54ecee-1eea-4e96-aca3-a75dd64ba677\n" ] } ], @@ -569,7 +534,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "id": "62642c3c-2054-403f-a9d5-170bd84f3f02", "metadata": {}, "outputs": [], @@ -630,63 +595,52 @@ "# Rerunning workflows via Streamlit\n", "\n", "\n", + "Now, let's proceed with the process of constructing the Streamlit app. This app will function as a gateway to Covalent, automatically initializing the Covalent server if it hasn't been started already, and commencing the initial workflow. Subsequently, new workflows will be triggered based on this initial one. \n", "\n", - "Instead of running and rerunning python scripts, we provide you with a [Streamlit](https://streamlit.io/) app that allows reruns simply by using a dispatch ID string. To be able to do so, we've added another layer of efficiency. A lightweight [FastAPI](https://fastapi.tiangolo.com/) server residing on the same machine as the Covalent server acts as a middleware. This server receives a JSON payload containing inputs and the dispatch ID, and forwards it to Covalent. This setup further decouples Streamlit from Covalent, allowing your Streamlit server to operate even more efficiently. \n", - "\n", - "\n", - "At this point, we recommend to decouple the python code into three files:\n", - "1. `workflow.py` containing the code to run the Covalent workflow\n", - "2. `streamlit_app.py` containing streamlit code\n", - "3. `fast_api.py` containing fast API code\n", - "\n", - "The outline of the middleware FastAPI layer can then be (`fast_api.py`):" + "At this point, we recommend to decouple the python code into two files:\n", + "1. `workflow.py` containing the code defining and running the Covalent workflow\n", + "2. `streamlit_app.py` containing Streamlit code" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "id": "46b82422-4acf-4698-bea4-79ea406c5a27", "metadata": {}, "outputs": [], "source": [ - "# save as fast_api.py\n", - "import uvicorn\n", - "from fastapi import FastAPI, Request\n", + "# add to streamlit_app.py\n", + "import streamlit as st\n", "import covalent as ct\n", + "from subprocess import check_output\n", + "import subprocess\n", "\n", - "app = FastAPI()\n", "\n", - "@app.post(\"/news_content_curator\")\n", - "async def news_content_api(dispatch_id: str, request: Request):\n", - " params = await request.json()\n", - " selected_content_analysis = params.pop('selected_content_analysis')\n", + "def is_covalent_down():\n", + " out = check_output([\"covalent\", \"status\"])\n", + " if \"Covalent server is stopped\" in out.decode('utf-8'):\n", + " return True\n", + " return False\n", "\n", - " redispatch_id = ct.redispatch(\n", - " dispatch_id, reuse_previous_results=True,\n", - " )(**params)\n", - " return {\n", - " 'status': 'success',\n", - " 'dispatch_id': redispatch_id\n", - " }\n", "\n", - "@app.get(\"/get_result\")\n", - "async def get_result(dispatch_id: str):\n", - " result = ct.get_result(dispatch_id, wait=True)\n", - " workflow_result = result.result\n", - " return workflow_result" - ] - }, - { - "cell_type": "markdown", - "id": "e7ce8c63-834d-4197-ac2e-483f84caff17", - "metadata": {}, - "source": [ - "To execute the FastAPI in a distinct Python script, you can include the subsequent code and store it as a separate script (`fast_api.py`).\n", + "def run_covalent_workflow(workflow_filename):\n", + " dispatch_id = check_output([\"python\", workflow_filename]).decode(\"utf-8\")\n", + " return dispatch_id.strip()\n", + "\n", + "\n", + "def start_covalent():\n", + " subprocess.run(\"covalent start --no-cluster\", shell=True)\n", "\n", - "```python\n", - "if __name__ == \"__main__\":\n", - " uvicorn.run(app, host=\"0.0.0.0\", port=8085)\n", - "```" + "\n", + "if is_covalent_down():\n", + " st.write(\"Covalent is not up. Starting Covalent...\")\n", + " start_covalent()\n", + " if check_google_creds():\n", + " # execute a covalent workflow\n", + " dispatch_id = run_covalent_workflow(\"workflow_remote.py\")\n", + " # wait for result\n", + " ct.get_result(dispatch_id, wait=True)\n", + " st.session_state['dispatch_id'] = dispatch_id" ] }, { @@ -694,7 +648,7 @@ "id": "99e29327-2fc4-4be0-aa51-002eda668e13", "metadata": {}, "source": [ - "After saving, run it using `python fast_api.py` in a separate shell." + "Now, the Streamlit app will automatically start Covalent server and launch the first workflow. You may also directly then reuse the `dispatch_id` of the launched workflow to start rerunning workflows and iterating with experiments on tweaking the workflow. " ] }, { @@ -715,12 +669,12 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 16, "id": "0d31b26a-57b1-48fb-bedf-08b710c903d0", "metadata": {}, "outputs": [], "source": [ - "import streamlit as st\n", + "# add to streamlit_app.py\n", "\n", "def create_streamlit_sidebar(\n", " stable_diffusion_models, news_summary_generation,\n", @@ -728,41 +682,39 @@ " genre_analysis_models\n", "):\n", " with st.sidebar:\n", - " # Define the location of the remote fast API middleware server\n", - " server_location = st.text_input(\n", - " \"Remote server URL\", value=\"http://localhost:8085\"\n", - " )\n", " news_article_url = st.text_input(\n", " \"News article URL\",\n", - " value=\"https://www.quantamagazine.org/math-proof-draws-new-boundaries-around-black-hole-formation-20230816/\" # noqa\n", + " value=\"https://www.quantamagazine.org/math-proof-draws-new-boundaries-around-black-hole-formation-20230816/\"\n", " )\n", " st.header(\"Parameters\")\n", - " st.subheader(\"Image generation\")\n", "\n", + " # Title generation section\n", + " st.subheader(\"Title generation parameters\")\n", + " title_generating_model = headline_generation_models[0]\n", + " temperature = st.slider(\n", + " \"Temperature\", min_value=0.0, max_value=100.0, value=1.0,\n", + " step=0.1\n", + " )\n", + " max_tokens = st.slider(\n", + " \"Max tokens\", min_value=2, max_value=50, value=32,\n", + " )\n", + "\n", + " # Image generation section\n", + " st.subheader(\"Image generation\")\n", " image_generation_prefix = st.text_input(\n", " \"Image generation prompt\",\n", " value=\"impressionist style\"\n", " )\n", " image_generation_model = stable_diffusion_models[0]\n", + "\n", + " # Text summarization section\n", " st.subheader(\"Text summarization\")\n", " summarizer_model = news_summary_generation[0]\n", " summarizer_max_length = st.slider(\n", - " \"Summarizer length\", min_value=5, max_value=200, value=64,\n", + " \"Summarization text length\", min_value=2, max_value=50, value=20,\n", " )\n", "\n", - " st.subheader(\"Text generation parameters\")\n", - " title_generating_model = headline_generation_models[0]\n", - "\n", - " temperature = st.slider(\n", - " \"Temperature\", min_value=0.0, max_value=100.0, value=1.0,\n", - " step=0.1\n", - " )\n", - " max_tokens = st.slider(\n", - " \"Max tokens\", min_value=5, max_value=200, value=64,\n", - " )\n", - " no_repeat_ngram_size = st.slider(\n", - " \"No repeat ngram size\", min_value=1, max_value=10, value=2,\n", - " )\n", + " # Content analysis section\n", " st.subheader(\"Content analysis\")\n", " selected_content_analysis = st.selectbox(\n", " \"Content analysis option\", options=[\n", @@ -775,7 +727,7 @@ " else:\n", " content_analysis_model = genre_analysis_models[0]\n", "\n", - " return server_location, {\n", + " return {\n", " 'news_url': news_article_url,\n", " 'image_generation_prefix': image_generation_prefix,\n", " 'summarizer_model': summarizer_model,\n", @@ -784,7 +736,6 @@ " 'image_generation_model': image_generation_model,\n", " 'temperature': temperature,\n", " 'max_tokens': max_tokens,\n", - " 'no_repeat_ngram_size': no_repeat_ngram_size,\n", " 'content_analysis_model': content_analysis_model,\n", " 'selected_content_analysis': selected_content_analysis\n", " }" @@ -800,64 +751,58 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 19, "id": "f7c9b8e5-23ff-48f3-ad1b-dc7f97eb165c", "metadata": {}, "outputs": [], "source": [ - "import requests\n", - "\n", - "\n", + "# add to streamlit_app.py\n", "st.title(\"News article AI summarization\")\n", - "dispatch_id_area = st.text_area(\"Dispatch IDs\")\n", + "dispatch_id = st.text_input(\"Dispatch ID\")\n", "\n", "if st.button(\"Generate image and text summary\"):\n", " st.write(\"Generating...\")\n", "\n", " container = st.container()\n", - " response = make_redispatch(\n", - " server_location, parameters, dispatch_id,\n", - " )\n", - " if response['status'] == \"error\":\n", - " st.write(f\"Error: {response['message']}\")\n", + "\t\t\n", + " # select either genre analysis or sentiment analysis\n", + " selected_content_analysis = parameters.pop('selected_content_analysis')\n", + " if selected_content_analysis != 'sentiment analysis':\n", + " replace_electrons = {\n", + " \"sentiment_analysis\": ct.electron(classify_news_genre)\n", + " }\n", + " parameters[\n", + " \"content_analysis_model\"\n", + " ] = \"abhishek/autonlp-bbc-news-classification-37229289\"\n", " else:\n", - " redispatch_id = response['dispatch_id']\n", - "\n", - " covalent_info = get_covalent_info(server_location).json()\n", - " address = covalent_info['address']\n", - " port = covalent_info['port']\n", - " covalent_url = f\"{address}:{port}/{redispatch_id}\"\n", - "\n", - " st.write(f\"Covalent URL on remote server: http://{covalent_url}\")\n", - "\n", - " with container:\n", - " result = get_dispatch_result(server_location, redispatch_id)\n", - " if result and result.json():\n", - " result = result.json()\n", - " st.subheader(\n", - " \"Article generated title: \" +\n", - " f\"{result['title']}\"\n", - " )\n", - " st.write(\n", - " \"In terms of \" +\n", - " parameters['selected_content_analysis'] +\n", - " \" content is: \" + str(result['content_property'])\n", - " )\n", - "\n", - " image_url = result['image']\n", - " response = requests.get(image_url, stream=True)\n", - " local_image = f'/tmp/{redispatch_id}.img.png'\n", - " with open(local_image, 'wb') as out_file:\n", - " shutil.copyfileobj(response.raw, out_file)\n", - " st.image(local_image)\n", - "\n", - " st.text_area(\n", - " label=\"AI generated summary\",\n", - " key=\"summary\",\n", - " value=result['summary'], disabled=True\n", - " )\n", - " else:\n", - " st.write(\"Error with processing, check workflow\")" + " replace_electrons = {}\n", + "\n", + " redispatch_id = ct.redispatch(\n", + " dispatch_id, reuse_previous_results=True,\n", + " replace_electrons=replace_electrons\n", + " )(**parameters)\n", + "\n", + " covalent_info = ct.get_config()['dispatcher']\n", + " address = covalent_info['address']\n", + " port = covalent_info['port']\n", + " covalent_url = f\"{address}:{port}/{redispatch_id}\"\n", + "\n", + " st.write(f\"Covalent URL on remote server: http://{covalent_url}\")\n", + "\n", + " with container:\n", + " result = ct.get_result(redispatch_id, wait=True).result\n", + " st.subheader(f\"Article generated title: {result['title']}\")\n", + " st.write(\n", + " \"In terms of \" +\n", + " selected_content_analysis +\n", + " \" content is: \" + str(result['content_property'])\n", + " )\n", + " st.image(result['image'])\n", + " st.text_area(\n", + " label=\"AI generated summary\",\n", + " key=\"summary\",\n", + " value=result['summary'], disabled=True\n", + " )" ] }, { @@ -873,7 +818,7 @@ "\n", "You can use the streamlit app as demonstrated below:\n", "\n", - "![StreamlitCovalent](assets/streamlit_covalent.gif \"Streamlit + Covalent\")\n", + "![StreamlitCovalent](assets/streamlit_workflow.gif \"Streamlit + Covalent\")\n", "\n", "Generating multiple images with Streamlit via Covalent is demonstrated below\n", "\n", @@ -887,11 +832,11 @@ "source": [ "# Conclusion\n", "\n", - "Through the integration of Covalent and Streamlit, we have developed a news content summarization application that demonstrates a smooth transition from the design of machine learning experiments to the dependable repetition and enhancement of experimental outcomes. Covalent's capability to re-execute previously performed workflows simplifies collaboration between engineers and researchers who constructed these workflows, enabling the reuse and customization of previously computed workflows. \n", + "You have learned how to build complex machine learning workflows using an example of a news summarization application. A Covalent server takes care of the machine learning workflows, while a Streamlit interface handles user interactions. The two communicate via a single (dispatch) ID, streamlining resource management, enhancing efficiency, and allowing you to concentrate on the machine learning aspects. \n", "\n", - "Leveraging Covalent's capabilities, we structured the application into three distinct components: 1) Covalent workflow design, 2) a FastAPI API layer that serves as an interface to Covalent, and 3) a user-friendly Streamlit interface responsible for invoking the FastAPI API layer and presenting the results in an easily comprehensible format.\n", + "If you found this interesting, please note that [Covalent](https://github.com/AgnostiqHQ/covalent) is free and [open source](https://www.covalent.xyz/open-source/). Please visit the [Covalent documentation](https://docs.covalent.xyz/docs/) for more information and many more [tutorials](https://docs.covalent.xyz/docs/user-documentation/tutorials/). An example of the Streamlit application described here was deployed [here](https://covalent-news-summary.streamlit.app/). Please note it will not be able to run out of the box, since it requires having valid Azure access credentials. \n", "\n", - "This tutorial aims to showcase the remarkable potential of Covalent in conjunction with Streamlit. Covalent is free and [open source](https://www.covalent.xyz/open-source/). Please visit the [Covalent documentation](https://docs.covalent.xyz/docs/) for more information and many more [tutorials](https://docs.covalent.xyz/docs/user-documentation/tutorials/). An example of the Streamlit application described here was deployed [here](https://covalentredispatch-u5xmshqomhpbtwnszya4jv.streamlit.app/)." + "Happy workflow building! 🎈" ] } ], @@ -911,7 +856,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" + "version": "3.9.18" } }, "nbformat": 4,