In [3]:
import os
import logging
import sys
from llama_index.readers.file.base import SimpleDirectoryReader  # Correct import path for SimpleDirectoryReader
from llama_index import (
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    GPTVectorStoreIndex,  # Updated class for vector store
)
from llama_index.llms.nvidia import NVIDIA  # NVIDIA NeMo for language generation
from llama_index.embeddings.nvidia import NVIDIAEmbedding  # NVIDIA Embedding model

# Enable logging to see what's happening under the hood
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# Set up paths
DATA_DIR = "data"  # Directory to store data (e.g., text files)
PERSIST_DIR = "./storage"  # Directory to store the persisted index

# Set up NVIDIA API Key (required for hosted NIM)
nvidia_api_key = os.environ.get("NVIDIA_API_KEY")
if not nvidia_api_key:
    raise ValueError("Please set your NVIDIA_API_KEY as an environment variable.")

# Initialize NVIDIA NeMo models for LLM and embedding
llm = NVIDIA(model="meta/llama-3.1-405b-instruct")
embed_model = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")

# Function to create or load the index
def create_or_load_index():
    if not os.path.exists(PERSIST_DIR):
        # If no index exists, create one from the documents in the 'data' folder
        print("No index found. Creating a new one...")
        documents = PDFReader().load_data("path/to/your/pdf/files")
        service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
        index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
        # Store the index for future use
        index.storage_context.persist(persist_dir=PERSIST_DIR)
    else:
        # Load the existing index from storage
        print("Index found. Loading the existing index...")
        storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
        index = load_index_from_storage(storage_context)
    return index

# Function to query the index
def query_index(query_str):
    index = create_or_load_index()
    query_engine = index.as_query_engine()
    response = query_engine.query(query_str)
    print("Response:", response)

# Make sure the data folder exists and contains the required file
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

# Download the example document (Paul Graham essay) if it doesn't exist
essay_path = os.path.join(DATA_DIR, "paul_graham_essay.txt")
if not os.path.exists(essay_path):
    essay_url = "https://raw.githubusercontent.com/jerryjliu/llama_index/main/docs/examples/paul_graham_essay.txt"
    print(f"Downloading the Paul Graham essay to {essay_path}...")
    import urllib.request
    urllib.request.urlretrieve(essay_url, essay_path)

# Example usage: Query the index
query_str = "What did the author do growing up?"
query_index(query_str)


ModuleNotFoundError: No module named 'llama_index.readers.file.base'

In [4]:
import llama_index
print(dir(llama_index))


['__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'core', 'llms', 'readers']


In [5]:
import llama_index
help(llama_index)


Help on package llama_index:

NAME
    llama_index

PACKAGE CONTENTS
    _bundle (package)
    cli (package)
    core (package)
    legacy (package)

SUBMODULES
    llms
    readers

FILE
    (built-in)




In [6]:
from llama_index.readers.file.base import SimpleDirectoryReader


ModuleNotFoundError: No module named 'llama_index.readers.file.base'

In [7]:
import llama_index.readers
print(dir(llama_index.readers))


['__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'file']


In [8]:
import llama_index.readers.file
print(dir(llama_index.readers.file))


['CSVReader', 'DocxReader', 'EpubReader', 'FlatReader', 'HTMLTagReader', 'HWPReader', 'IPYNBReader', 'ImageCaptionReader', 'ImageReader', 'ImageTabularChartReader', 'ImageVisionLLMReader', 'MarkdownReader', 'MboxReader', 'PDFReader', 'PagedCSVReader', 'PandasCSVReader', 'PandasExcelReader', 'PptxReader', 'PyMuPDFReader', 'RTFReader', 'UnstructuredReader', 'VideoAudioReader', 'XMLReader', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'docs', 'epub', 'flat', 'html', 'image', 'image_caption', 'image_deplot', 'image_vision_llm', 'ipynb', 'markdown', 'mbox', 'paged_csv', 'pymu_pdf', 'rtf', 'slides', 'tabular', 'unstructured', 'video_audio', 'xml']
