In [2]:
import os
import sys

In [15]:
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import MarkdownReader

def load_markdown_docs(filepath):
    parser = MarkdownReader()
    file_extractor = {".md": parser}
    loader = SimpleDirectoryReader(
    input_dir=filepath, file_extractor=file_extractor,
    recursive=True
    ).load_data()

    return loader

In [17]:
# load our documents from each folder.
# we keep them seperate for now, in order to create seperate indexes later
getting_started_docs = load_markdown_docs("./data/getting_started")
community_docs = load_markdown_docs("./data/community")
data_docs = load_markdown_docs("./data/core_modules/data_modules")
agent_docs = load_markdown_docs("./data/core_modules/agent_modules")
model_docs = load_markdown_docs("./data/core_modules/model_modules")
query_docs = load_markdown_docs("./data/core_modules/query_modules")
supporting_docs = load_markdown_docs("./data/core_modules/supporting_modules")
tutorials_docs = load_markdown_docs("./data/end_to_end_tutorials")
contributing_docs = load_markdown_docs("./data/development")

In [19]:
# Make our printing look nice
from llama_index.core.schema import MetadataMode

In [20]:
print(agent_docs[0].get_content(metadata_mode=MetadataMode.ALL))

file_path: a:\Python Programs\Llama_index\Llamaindex\data\core_modules\agent_modules\agents\modules.md
file_name: modules.md
file_size: 619
creation_date: 2024-06-19
last_modified_date: 2023-09-12



Module Guides

These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


In [21]:
print(agent_docs[0].metadata)

{'file_path': 'a:\\Python Programs\\Llama_index\\Llamaindex\\data\\core_modules\\agent_modules\\agents\\modules.md', 'file_name': 'modules.md', 'file_size': 619, 'creation_date': '2024-06-19', 'last_modified_date': '2023-09-12'}


In [22]:
text_template = "Content Metadata: \n{metadata_str}\n\nContent: \n{content}"
metadata_template = "{key}: {value},"
metadata_seperator = " "

for doc in agent_docs:
    doc.text_template = text_template
    doc.metadata_template = metadata_template
    doc.metadata_seperator = metadata_seperator

In [23]:
print(agent_docs[0].get_content(metadata_mode=MetadataMode.ALL))

Content Metadata: 
file_path: a:\Python Programs\Llama_index\Llamaindex\data\core_modules\agent_modules\agents\modules.md, file_name: modules.md, file_size: 619, creation_date: 2024-06-19, last_modified_date: 2023-09-12,

Content: 


Module Guides

These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


### Advanced Customization
Going even further with metadata, we can also customize which metadata fields will be seen byb both the embedding model and LLM

In [26]:
# Hide the File Name from the LLM
agent_docs[0].excluded_llm_metadata_keys = ["file_name"]
print(agent_docs[0].get_content(metadata_mode=MetadataMode.LLM))

Content Metadata: 
file_path: a:\Python Programs\Llama_index\Llamaindex\data\core_modules\agent_modules\agents\modules.md, file_size: 619, creation_date: 2024-06-19, last_modified_date: 2023-09-12,

Content: 


Module Guides

These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


In [27]:
# # Hide the File Name from the embedding model
agent_docs[0].excluded_embed_metadata_keys = ["file_name"]
print(agent_docs[0].get_content(metadata_mode=MetadataMode.EMBED))

Content Metadata: 
file_path: a:\Python Programs\Llama_index\Llamaindex\data\core_modules\agent_modules\agents\modules.md, file_size: 619, creation_date: 2024-06-19, last_modified_date: 2023-09-12,

Content: 


Module Guides

These guide provide an overview of how to use our agent classes.

For more detailed guides on how to use specific tools, check out our tools module guides.


### Conclusion

In this notebook, we covered how to use a custom data loader, as well as how to customize the text representations of your data when including metadata for both LLM and embedding model