Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
notebooks/dataset.zip filter=lfs diff=lfs merge=lfs -text
153 changes: 153 additions & 0 deletions .github/workflows/docs-build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
name: docs-build

on:
pull_request:
branches: [ main, release-* ]
types: [ opened, synchronize ]

push:
branches: [ main ]
tags:
- v*
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

defaults:
run:
shell: bash

jobs:
build-docs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build image
run: |
docker build --pull --tag docs-builder:latest --file docs/Dockerfile .
- name: Build docs
run: |
docker run -v $(pwd):/work -w /work docs-builder:latest sphinx-build -b html -d /tmp docs docs/_build/output
- name: Delete unnecessary files
run: |
sudo rm -rf docs/_build/jupyter_execute
sudo rm -rf docs/_build/.buildinfo
- name: Upload HTML
uses: actions/upload-artifact@v4
with:
name: html-build-artifact
path: docs/_build/
if-no-files-found: error
retention-days: 1
- name: Store PR information
if: ${{ github.event_name == 'pull_request' }}
run: |
mkdir ./pr
echo ${{ github.event.number }} > ./pr/pr.txt
echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt
echo ${{ github.event.action }} > ./pr/action.txt
- name: Upload PR information
if: ${{ github.event_name == 'pull_request' }}
uses: actions/upload-artifact@v4
with:
name: pr
path: pr/

store-html:
needs: [ build-docs ]
if: ${{ github.event_name == 'push' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: "gh-pages"
- name: Initialize Git configuration
run: |
git config user.name docs-build
git config user.email do-not-send@github.com
- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: html-build-artifact
- name: Copy HTML directories
run: |
ls -asl
- name: Store bleeding edge docs from main
if: ${{ github.ref == 'refs/heads/main' }}
run: |
mkdir main || true
rsync -av --progress --delete output/ main/
git add main
- name: Store docs for a release tag
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
env:
LATEST: ${{ contains(github.event.head_commit.message, '/not-latest') && 'not-true' || 'true' }}
run: |
printenv LATEST
if [[ "${GITHUB_REF}" =~ "-rc" ]]; then
echo "Not saving documents for release candidates."
exit 0
fi
if [[ "${GITHUB_REF}" =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
TAG="${BASH_REMATCH[1]}"
mkdir "${TAG}" || true
rsync -av --progress --delete output/ "${TAG}/"
git add "${TAG}/"
if [[ "${LATEST}" == 'true' ]]; then
mkdir latest || true
rsync -av --progress --delete output/ latest/
cp output/versions.json .
git add latest
git add versions.json
fi
fi
- name: Check or create dot-no-jekyll file
run: |
if [ -f ".nojekyll" ]; then
echo "The dot-no-jekyll file already exists."
exit 0
fi
touch .nojekyll
git add .nojekyll
- name: Check or create redirect page
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
resp=$(grep 'http-equiv="refresh"' index.html 2>/dev/null) || true
if [ -n "${resp}" ]; then
echo "The redirect file already exists."
exit 0
fi
# If any of these commands fail, fail the build.
html_url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" --jq ".html_url")
# Beware ugly quotation mark avoidance in the foll lines.
echo '<!DOCTYPE html>' > index.html
echo '<html>' >> index.html
echo ' <head>' >> index.html
echo ' <title>Redirect to documentation</title>' >> index.html
echo ' <meta charset="utf-8">' >> index.html
echo ' <meta http=equiv="refresh" content="3; URL='${html_url}'/latest/index.html">' >> index.html
echo ' <link rel="canonical" href="'${html_url}'/latest/index.html">' >> index.html
echo ' <script language="javascript">' >> index.html
echo ' function redirect() {' >> index.html
echo ' window.location.assign("'${html_url}'/latest/index.html")' >> index.html
echo ' }' >> index.html
echo ' </script>' >> index.html
echo ' </head>' >> index.html
echo ' <body onload="redirect()">' >> index.html
echo ' <p>Please follow the link to the <a href="'${html_url}'/latest/index.html">' >> index.html
echo 'latest</a> documentation.</p>' >> index.html
echo ' </body>' >> index.html
echo '</html>' >> index.html
git add index.html
- name: Commit changes to the GitHub Pages branch
run: |
git status
if git commit -m 'Pushing changes to GitHub Pages.'; then
git push -f
else
echo "Nothing changed."
fi
117 changes: 117 additions & 0 deletions .github/workflows/docs-preview-pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
name: docs-preview-pr

on:
workflow_run:
workflows: [docs-build]
types: [completed]

env:
WF_ID: ${{ github.event.workflow_run.id }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
# Always determine if GitHub Pages are configured for this repo.
get-gh-pages-url:
if:
github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
outputs:
url: ${{ steps.api-resp.outputs.html_url || '' }}
branch: ${{ steps.api-resp.outputs.branch || '' }}
steps:
- name: Check for GitHub Pages
id: api-resp
run: |
has_pages=$(gh api "repos/${GITHUB_REPOSITORY}" -q '.has_pages')
if [ "true" != "${has_pages}" ]; then
echo "GitHub pages is not active for the repository. Quitting."
return
fi

url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.html_url')
branch=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.source.branch')

echo "html_url=${url}" >> $GITHUB_OUTPUT
echo "branch=${branch}" >> $GITHUB_OUTPUT

# Identify the dir for the HTML.
store-html:
runs-on: ubuntu-latest
needs: [get-gh-pages-url]
if: needs.get-gh-pages-url.outputs.url != ''
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.get-gh-pages-url.outputs.branch }}
- name: Initialize Git configuration
run: |
git config user.name docs-preview
git config user.email do-not-send-@github.com
- name: Download artifacts
run: |
gh run view "${WF_ID}"
gh run download "${WF_ID}"
PR=$(cat ./pr/pr.txt)
MERGED=$(cat ./pr/merged.txt)
ACTION=$(cat ./pr/action.txt)
echo "PR_NO=${PR}" >> $GITHUB_ENV
echo "MERGE_STATUS=${MERGED}" >> $GITHUB_ENV
echo "PR_ACTION=${ACTION}" >> $GITHUB_ENV
echo "REVIEW_DIR=review/" >> $GITHUB_ENV
echo "PR_REVIEW_DIR=review/pr-${PR}" >> $GITHUB_ENV

# Remove the pr artifact directory so that it does not
# appear in listings or confuse git with untracked files.
rm -rf ./pr

# Permutations:
# - PR was updated, PR_ACTION is !closed, need to delete review directory and update it.
# - PR was closed (regardless of merge), PR_ACTION is closed, need to delete review directory.

# If this PR is still open, store HTML in a review directory.
- name: Handle HTML review directory for open PRs and updates to PRs
if: env.MERGE_STATUS == 'false' && env.PR_ACTION != 'closed'
run: |
rm -rf "${{ env.PR_REVIEW_DIR }}" 2>/dev/null || true
if [ ! -d "${{ env.REVIEW_DIR }}" ]; then
mkdir "${{ env.REVIEW_DIR }}"
fi
mv ./html-build-artifact/latest/ "${{ env.PR_REVIEW_DIR }}"
git add "${{ env.PR_REVIEW_DIR }}"
# If the PR was closed, merged or not, delete review directory.
- name: Delete HTML review directory for closed PRs
if: env.PR_ACTION == 'closed'
run: |
if [ -d ./html-build-artifact/ ]; then
rm -rf ./html-build-artifact/ 2>/dev/null
fi
if [ -d "${{ env.PR_REVIEW_DIR }}" ]; then
git rm -rf "${{ env.PR_REVIEW_DIR }}"
fi
- name: Commit changes to the GitHub Pages branch
run: |
git status
if git commit -m 'Pushing changes to GitHub Pages.'; then
git push -f
else
echo "Nothing changed."
fi
- name: Check for existing documentation review comment
run: |
result=$(gh pr view ${{ env.PR_NO }} --json comments -q 'any(.comments[].body; contains("Documentation preview"))')
echo "COMMENT_EXISTS=${result}" >> $GITHUB_ENV
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Add HTML review URL comment to a newly opened PR
if: env.MERGE_STATUS == 'false' && env.COMMENT_EXISTS == 'false'
env:
URL: ${{ needs.get-gh-pages-url.outputs.url }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: bash
run: |
echo -e "## Documentation preview" > body
echo -e "" >> body
echo -e "<${{ env.URL }}${{ env.PR_REVIEW_DIR }}>" >> body
cat body
gh pr comment ${{ env.PR_NO }} --body-file body
11 changes: 11 additions & 0 deletions .github/workflows/docs-remove-stale-reviews.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: docs-remove-stale-reviews

on:
schedule:
# 42 minutes after 0:00 UTC on Sundays
- cron: "42 0 * * 0"
workflow_dispatch:

jobs:
remove:
uses: nvidia-merlin/.github/.github/workflows/docs-remove-stale-reviews-common.yaml@main
13 changes: 12 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,15 @@ deploy/*.txt

# Docker Compose exclusions
volumes/
uploaded_files/
uploaded_files/

# Visual Studio Code
.vscode

# Node modules
**/node_modules

# File from docs builds
docs/_*
docs/notebooks
docs/experimental
45 changes: 43 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,48 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.4.0] - 2024-02-22
## [0.5.0] - 2024-03-19

This release adds new dedicated RAG examples showcasing state of the art usecases, switches to the latest [API catalog endpoints from NVIDIA](https://build.nvidia.com/explore/discover) and also refactors the API interface of chain-server. This release also improves the developer experience by adding github pages based documentation and streamlining the example deployment flow using dedicated compose files.

### Added

- Github pages based documentation.
- New examples showcasing
- [Multi-turn RAG](./RetrievalAugmentedGeneration/examples/multi_turn_rag/)
- [Multi-modal RAG](./RetrievalAugmentedGeneration//examples/multimodal_rag/)
- [Structured data CSV RAG](./RetrievalAugmentedGeneration/examples/csv_rag/)
- Support for [delete and list APIs](./docs/api_reference/openapi_schema.json) in chain-server component
- Streamlined RAG example deployment
- Dedicated new [docker compose files](./deploy/compose/) for every examples.
- Dedicated [docker compose files](./deploy/compose/docker-compose-vectordb.yaml) for launching vector DB solutions.
- New configurations to control top k and confidence score of retrieval pipeline.
- Added [a notebook](./models/NeMo/slm/README.md) which covers how to train SLMs with various techniques using NeMo Framework.
- Added more [experimental examples](./experimental/README.md) showcasing new usecases.
- [NVIDIA ORAN chatbot multimodal Assistant](./experimental/oran-chatbot-multimodal/)
- [NVIDIA Retrieval Customization](./experimental/synthetic-data-retriever-customization/)
- [NVIDIA RAG Streaming Document Ingestion Pipeline](./experimental/streaming_ingest_rag/)
- [NVIDIA Live FM Radio ASR RAG](./experimental/fm-asr-streaming-rag/)
- [New dedicated notebook](./notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb) showcasing a RAG pipeline using web pages.


### Changed

- Switched from NVIDIA AI Foundation to [NVIDIA API Catalog endpoints](https://build.nvidia.com/explore/discover) for accessing cloud hosted LLM models.
- Refactored [API schema of chain-server component](./docs/api_reference/openapi_schema.json) to support runtime allocation of llm parameters like temperature, max tokens, chat history etc.
- Renamed `llm-playground` service in compose files to `rag-playground`.
- Switched base containers for all components to ubuntu instead of pytorch and optimized container build time as well as container size.
- Deprecated yaml based configuration to avoid confusion, all configurations are now environment variable based.
- Removed requirement of hardcoding `NVIDIA_API_KEY` in `compose.env` file.
- Upgraded all python dependencies for chain-server and rag-playground services.

### Fixed

- Fixed a bug causing hallucinated answer when retriever fails to return any documents.
- Fixed some accuracy issues for all the examples.


## [0.4.0] - 2024-02-23

### Added

Expand Down Expand Up @@ -75,4 +116,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Fixed

- [Fixed issue #13](https://github.com/NVIDIA/GenerativeAIExamples/issues/13) of pipeline not able to answer questions unrelated to knowledge base
- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files
- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files
Loading