# Installations and imports

In [3]:
# !pip install llama-index
# !pip install llama-index-core
# !pip install llama-index-embeddings-openai
# !pip install llama-parse
# !pip install llama-index-vector-stores-kdbai
# !pip install pandas
# !pip install llama-index-postprocessor-cohere-rerank
# !pip install kdbai_client

In [2]:
from llama_parse import LlamaParse
from llama_index.core import Settings
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import MarkdownElementNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.kdbai import KDBAIVectorStore
from llama_index.postprocessor.cohere_rerank import CohereRerank
from getpass import getpass
import kdbai_client as kdbai

In [11]:
!python --version

Python 3.11.5


# Set up API keys

In [4]:
# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio
import nest_asyncio
nest_asyncio.apply()


import os
# API access to llama-cloud
os.environ["LLAMA_CLOUD_API_KEY"] = "llx-"

# Using OpenAI API for embeddings/llms
os.environ["OPENAI_API_KEY"] = "sk-"

# Using Cohere for reranking
os.environ["COHERE_API_KEY"] = "xyz..."

In [5]:
#Set up KDB.AI endpoing and API key
KDBAI_ENDPOINT = (
    os.environ["KDBAI_ENDPOINT"]
    if "KDBAI_ENDPOINT" in os.environ
    else input("KDB.AI endpoint: ")
)
KDBAI_API_KEY = (
    os.environ["KDBAI_API_KEY"]
    if "KDBAI_API_KEY" in os.environ
    else getpass("KDB.AI API key: ")
)

#connect to KDB.AI
session = kdbai.Session(api_key=KDBAI_API_KEY, endpoint=KDBAI_ENDPOINT)

KDBAIException: Invalid URL: .

# Create schema and table

In [None]:
# The schema contains two metadata columns (document_id, text) and one embeddings column
# Index type, search metric (Euclidean distance), and dimensions are specified in the embedding column
schema = dict(
    columns=[
        dict(name="document_id", pytype="bytes"),
        dict(name="text", pytype="bytes"),
        dict(
            name="embedding",
            vectorIndex=dict(type="flat", metric="L2", dims=1536),
        ),
    ]
)

KDBAI_TABLE_NAME = "LlamaParse_Table"

# First ensure the table does not already exist
if KDBAI_TABLE_NAME in session.list():
    session.table(KDBAI_TABLE_NAME).drop()

#Create the table
table = session.create_table(KDBAI_TABLE_NAME, schema)

`wget command to import pdf here`

In [None]:
!wget `pdf`

In [None]:
EMBEDDING_MODEL  = "text-embedding-3-small"
GENERATION_MODEL = "gpt-3.5-turbo-0125"

llm = OpenAI(model=GENERATION_MODEL)
embed_model = OpenAIEmbedding(model=EMBEDDING_MODEL)

Settings.llm = llm
Settings.embed_model = embed_model

pdf_file_name = './LLM_recall.pdf'