diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/_demo.md b/content/learning-paths/servers-and-cloud-computing/onnx/_demo.md new file mode 100644 index 0000000000..4e88f92d4c --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/onnx/_demo.md @@ -0,0 +1,61 @@ +--- +title: Run a Phi-4-mini chatbot powered by ONNX Runtime +weight: 2 + +overview: | + This Learning Path shows you how to use a 32-core Azure Dpls_v6 instance powered by an Arm Neoverse-N2 CPU to build a simple chatbot server that you can then use to provide a chatbot to serve a small number of concurrent users. + + This architecture is suitable for businesses looking to deploy the latest Generative AI technologies with RAG capabilities using their existing CPU compute capacity and deployment pipelines. + + The demo uses the ONNX runtime, which Arm has enhanced with its own Kleidi technologies. Further optimizations are achieved by using the smaller Phi-4-mini model, which has been optimized at INT4 quantization to minimize memory usage. + + Chat with the chatbot LLM below to see the performance for yourself, and then follow the Learning Path to build your own Generative AI service on Arm Neoverse. + + +demo_steps: + - Type and send a message to the chatbot. + - Receive the chatbot's reply. + - View performance statistics demonstrating how well Azure Cobalt 100 instances run LLMs. + +diagram: config-diagram-dark.png +diagram_blowup: config-diagram.png + +terms_and_conditions: demo-terms-and-conditions.txt + +prismjs: true # enable prismjs rendering of code snippets + +example_user_prompts: + - Prompt 1? + - Prompt 2? + + +rag_data_cutoff_date: 2025/01/17 + +title_chatbot_area: Phi-4-mini Chatbot Demo + +prismjs: true + + + +### Specific details to this demo +# ================================================================================ +tps_max: 30 # sets stat visuals for tps +tps_ranges: + - name: Low + context: Around the average human reading rate of 3-5 words per second. + color: var(--arm-green) + min: 0 + max: 5 + - name: High + context: This is significantly higher than the average human reading rate of 5 words per second, delivering a stable and usable user chatbot experience from the Phi-4-mini LLM using the ONNX runtime. + color: var(--arm-green) + min: 5 + max: 1000 + +### FIXED, DO NOT MODIFY +# ================================================================================ +demo_template_name: phi_onnx_chatbot_demo # allows the 'demo.html' partial to route to the correct Configuration and Demo/Stats sub partials for page render. +weight: 2 # _index.md always has weight of 1 to order correctly +layout: "learningpathall" # All files under learning paths have this same wrapper +learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content. +--- diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/analysis.md b/content/learning-paths/servers-and-cloud-computing/onnx/analysis.md index 68e53a2335..8939fad493 100644 --- a/content/learning-paths/servers-and-cloud-computing/onnx/analysis.md +++ b/content/learning-paths/servers-and-cloud-computing/onnx/analysis.md @@ -1,6 +1,6 @@ --- title: Interact with the Phi-4-mini Chatbot -weight: 4 +weight: 5 layout: learningpathall --- diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/chatbot-icon.png b/content/learning-paths/servers-and-cloud-computing/onnx/chatbot-icon.png new file mode 100644 index 0000000000..6560d76baa Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/onnx/chatbot-icon.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/chatbot.md b/content/learning-paths/servers-and-cloud-computing/onnx/chatbot.md index 9ac96fa8c4..005c213d7f 100644 --- a/content/learning-paths/servers-and-cloud-computing/onnx/chatbot.md +++ b/content/learning-paths/servers-and-cloud-computing/onnx/chatbot.md @@ -1,6 +1,6 @@ --- title: Run the Chatbot Server -weight: 3 +weight: 4 layout: learningpathall --- diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/config-diagram-dark.png b/content/learning-paths/servers-and-cloud-computing/onnx/config-diagram-dark.png new file mode 100644 index 0000000000..0610f5d0b0 Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/onnx/config-diagram-dark.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/config-diagram.png b/content/learning-paths/servers-and-cloud-computing/onnx/config-diagram.png new file mode 100644 index 0000000000..997100ddc6 Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/onnx/config-diagram.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/onnx/setup.md b/content/learning-paths/servers-and-cloud-computing/onnx/setup.md index 752251b29a..d28f75f19f 100644 --- a/content/learning-paths/servers-and-cloud-computing/onnx/setup.md +++ b/content/learning-paths/servers-and-cloud-computing/onnx/setup.md @@ -2,7 +2,7 @@ # User change title: "Build ONNX Runtime and set up the Phi-4-mini Model" -weight: 2 +weight: 3 # Do not modify these elements layout: "learningpathall" diff --git a/themes/arm-design-system-hugo-theme/layouts/partials/demo-components/llm-chatbot/javascript--llm-chatbot.html b/themes/arm-design-system-hugo-theme/layouts/partials/demo-components/llm-chatbot/javascript--llm-chatbot.html index 2c183ce6db..ea2f7343ee 100644 --- a/themes/arm-design-system-hugo-theme/layouts/partials/demo-components/llm-chatbot/javascript--llm-chatbot.html +++ b/themes/arm-design-system-hugo-theme/layouts/partials/demo-components/llm-chatbot/javascript--llm-chatbot.html @@ -440,6 +440,7 @@ all_messages_div.removeChild(all_messages_div.firstChild); } {{ else if eq .Params.demo_template_name "llm_chatbot_first_demo" }} + {{ else if eq .Params.demo_template_name "phi_onnx_chatbot_demo" }} {{ else }} {{ end }} @@ -629,6 +630,9 @@ {{ else if eq .Params.demo_template_name "llm_chatbot_first_demo" }} {{ $server_location = getenv "HUGO_LLM_API" | base64Encode }} console.log('Using LLM API.'); + {{ else if eq .Params.demo_template_name "phi_onnx_chatbot_demo" }} + {{ $server_location = getenv "HUGO_PHI_ONNX_LLM_API" | base64Encode }} + console.log('Using HUGO_PHI_ONNX_LLM_API.'); {{ else }} console.log('No server location provided.'); {{ end }} diff --git a/themes/arm-design-system-hugo-theme/layouts/partials/learning-paths/demo.html b/themes/arm-design-system-hugo-theme/layouts/partials/learning-paths/demo.html index 7e0fbe1d81..3745550619 100644 --- a/themes/arm-design-system-hugo-theme/layouts/partials/learning-paths/demo.html +++ b/themes/arm-design-system-hugo-theme/layouts/partials/learning-paths/demo.html @@ -24,6 +24,9 @@ {{else if eq .Params.demo_template_name "whisper_audio_demo"}} {{/* {{partial "demo-components/config-params-only.html" .}} */}} +{{else if eq .Params.demo_template_name "phi_onnx_chatbot_demo"}} + {{/* {{partial "demo-components/config-params-only.html" .}} */}} + {{else if eq .Params.demo_template_name "kubectl_demo"}} {{partial "demo-components/config-param-and-file.html" .}} @@ -42,6 +45,10 @@ {{partial "demo-components/llm-voice-transcriber/demo-stats--llm-voice-transcriber.html" .}} {{partial "demo-components/llm-voice-transcriber/javascript--llm-voice-transcriber.html" .}} +{{else if eq .Params.demo_template_name "phi_onnx_chatbot_demo"}} + {{partial "demo-components/llm-chatbot/demo-stats--llm-chatbot.html" .}} + {{partial "demo-components/llm-chatbot/javascript--llm-chatbot.html" .}} + {{else if eq .Params.demo_template_name "kubectl_demo"}} {{partial "demo-components/demo--kubectl.html" .}}