## Imports

In [2]:
from dotenv import load_dotenv
import os
import fnmatch
from IPython.display import display, Markdown

from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA

from github_utils import fetch_mdx_contents, get_files_from_github_repo, load_mdx_contents, save_mdx_content
from retriever_utils import get_chroma_db, get_source_chunks, ask_question, chain_type_kwargs
from langchain.chat_models import ChatOpenAI

## Sctipt

In [3]:
load_dotenv()
# You need to set this in your .env file
tag = "3.6.x"
tag = "main"
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
REPO_OWNER = "RasaHQ"
REPO_NAME = "rasa"

EXIT_COMMAND = "exit"
tag_path = tag.replace(".", "_")
DOCS_MDX_PATH = f"docs_cache/mdx_{tag_path}.pkl"
CHROMA_DB_PATH = f"./chroma/{REPO_NAME}/{tag_path}"

repo_files = get_files_from_github_repo(REPO_OWNER, REPO_NAME, GITHUB_TOKEN, tag)


In [4]:

mdx_files = [
    file
    for file in repo_files
    if file["type"] == "blob" and fnmatch.fnmatch(file["path"], "*.mdx")
]
mdx_contents = fetch_mdx_contents(mdx_files, wait_for_renewal=False)

if len(mdx_contents) < len(mdx_files):
    print()
    print(
        f"Warning: {len(mdx_contents)} of {len(mdx_files)} files were downloaded."
    )
    print(f"Please check your Github API rate limit.")
    mdx_contents = load_mdx_contents(DOCS_MDX_PATH)
    print(f"Loaded {len(mdx_contents)} mdx files from disk.")
elif len(mdx_contents) == len(mdx_files):
    save_mdx_content(DOCS_MDX_PATH, mdx_contents)
    print(f"Saved {len(mdx_contents)} mdx files to disk.")



Please check your Github API rate limit.
Using old mdx files from disk.
Loaded 107 mdx files from disk.


In [None]:
source_chunks = get_source_chunks(mdx_contents)

embedding_model = OpenAIEmbeddings()
chroma_db = get_chroma_db(CHROMA_DB_PATH, source_chunks, embedding_model)

qa = RetrievalQA.from_chain_type(
    chain_type="stuff",
    retriever=chroma_db.as_retriever(),
    chain_type_kwargs=chain_type_kwargs,
    llm=ChatOpenAI()
)

## Ask a Question

In [8]:
result = ask_question("How to create a rasa project?", qa)

# "How should I pass intents data for training? rasa nlu"

**Question:** How to create a rasa project?

**Answer:** To create a Rasa project, you can use the following command:

```bash
rasa init
```

This command will create a directory called `data` and set up the basic structure for your Rasa project.

You can then remove the files in the `data` directory and add your own training data, configuration, and domain files.

For more details, you can refer to the Rasa documentation on [Creating a Rasa Project](https://rasa.com/docs/rasa/user-guide/rasa-tutorial/#creating-a-new-rasa-project).

## Old Answer

In [109]:
# res = qa.run("How should I pass intents data for training? rasa nlu")

In [110]:
# from IPython.display import display, Markdown
# display(Markdown(res.replace('. ', '.\n\n')))

 You should pass intents data for training by grouping them under the `nlu` key.

Training examples should be grouped by intent and listed under the `examples` key.

You can also add extra information such as regular expressions and lookup tables to your training data to help the model identify intents and entities correctly.