In [None]:
%%capture
!pip install -qU llama-index
!pip install -qU weave ml-collections
!pip install -qU llama-index-llms-anthropic
!pip install -qU llama-index-callbacks-wandb
!git clone https://github.com/wandb/weave.git
!pip install -qU llama-index-embeddings-huggingface

In [None]:
import os

import wandb
import weave
from google.colab import userdata
from llama_index.callbacks.wandb import WandbCallbackHandler

os.environ["WANDB_API_KEY"] = userdata.get("W&B")
os.environ["ANTHROPIC_API_KEY"] = userdata.get("ANTHROPIC_API_KEY")

weave.init("chatbot-claude3-llamaindex-weave")
wandb_callback = WandbCallbackHandler(
    run_args={"project": "chatbot-claude3-llamaindex-weave"}
)

In [None]:
# @title ⚙️ Configuration
import ml_collections
from llama_index.core import Settings


def get_config() -> ml_collections.ConfigDict:
    config = ml_collections.ConfigDict()
    config.model: str = "claude-3-haiku-20240307"  # @param {type: "string"}
    config.embedding_model: str = "BAAI/bge-small-en-v1.5"  # @param {type: "string"}
    config.fetch_index_from_wandb: bool = True  # @param {type: "boolean"}
    return config


config = get_config()

## 💿 The Dataset
---

In [None]:
from llama_index.core import SimpleDirectoryReader

required_exts = [".md"]

reader = SimpleDirectoryReader(
    input_dir="/content/weave/docs",
    required_exts=required_exts,
    recursive=True,
)

docs = reader.load_data()

## ✍️ Model Architecture & Training
---

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

%%capture
from llama_index.llms.anthropic import Anthropic

Settings.llm = Anthropic(temperature=0.0, model=config.model)
Settings.embed_model = HuggingFaceEmbedding(model_name=config.embedding_model)

## 🗂 Creating a Index
---

In [None]:
from llama_index.core import VectorStoreIndex

if not config.fetch_index_from_wandb:
    index = VectorStoreIndex.from_documents(docs)
    wandb_callback.persist_index(index, index_name="claude3-index")

In [None]:
from llama_index.core import load_index_from_storage

if config.fetch_index_from_wandb:
    storage_context = wandb_callback.load_storage_context(
        artifact_url="sauravmaheshkar/chatbot-claude3-llamaindex-weave/claude3-index:v0"
    )

    # Load the index and initialize a query engine
    index = load_index_from_storage(
        storage_context,
    )

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("What python version does weave require ?")
print(response, sep="\n")

In [None]:
wandb_callback.finish()