In [4]:
import nest_asyncio
nest_asyncio.apply()

from IPython.display import Markdown, display

In [5]:
from typing import List
from dotenv import dotenv_values

from llama_index.core import (SimpleDirectoryReader, 
                              Settings,
                              VectorStoreIndex)

from llama_index.core.node_parser import SentenceSplitter

from llama_index.core.schema import (BaseNode,
                                     Document,
                                     MetadataMode)

from llama_index.vector_stores.qdrant import QdrantVectorStore

from llama_parse import LlamaParse

from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding

from qdrant_client import QdrantClient

In [6]:
from etl import (add_metadata_to_documents,
                         extract,
                         transform
                         )

In [7]:
config = dotenv_values(".env")

In [None]:
Settings.embed_model = embedding_model
Settings.llm = llm

In [None]:
documents = transform(add_metadata_to_documents(extract(["sample_data/ozempic.pdf"])))

In [None]:
qdrant_client = QdrantClient(url=config["QDRANT_ENDPOINT"], 
                             api_key=config["QDRANT_API_KEY"])

In [None]:
print(len(documents))

In [None]:
documents[19].metadata

In [None]:
documents[1]

In [None]:
print(
    "The LLM sees this: \n",
    documents[1].get_content(metadata_mode=MetadataMode.LLM),
)

In [None]:
print(
    "The Embedding model sees this: \n",
    documents[1].get_content(metadata_mode=MetadataMode.EMBED),
)

# MODELS

In [8]:
import google.generativeai as genai


In [9]:
genai.configure(api_key=config["GEMINI_API_KEY"])

In [10]:
print('Available base models:', [m.name for m in genai.list_models()])

Available base models: ['models/chat-bison-001', 'models/text-bison-001', 'models/embedding-gecko-001', 'models/gemini-1.0-pro-latest', 'models/gemini-1.0-pro', 'models/gemini-pro', 'models/gemini-1.0-pro-001', 'models/gemini-1.0-pro-vision-latest', 'models/gemini-pro-vision', 'models/gemini-1.5-pro-latest', 'models/gemini-1.5-pro-001', 'models/gemini-1.5-pro-002', 'models/gemini-1.5-pro', 'models/gemini-1.5-pro-exp-0801', 'models/gemini-1.5-pro-exp-0827', 'models/gemini-1.5-flash-latest', 'models/gemini-1.5-flash-001', 'models/gemini-1.5-flash-001-tuning', 'models/gemini-1.5-flash', 'models/gemini-1.5-flash-exp-0827', 'models/gemini-1.5-flash-002', 'models/gemini-1.5-flash-8b', 'models/gemini-1.5-flash-8b-001', 'models/gemini-1.5-flash-8b-latest', 'models/gemini-1.5-flash-8b-exp-0827', 'models/gemini-1.5-flash-8b-exp-0924', 'models/embedding-001', 'models/text-embedding-004', 'models/aqa']


In [None]:
embedding_model.model

In [None]:
print("\nSequential Embedding: ")
q_embeddings = [
    embedding_model.get_query_embedding("What's the weather like in Komchatka?"),
    embedding_model.get_query_embedding("What kinds of food is Italy known for?"),
    embedding_model.get_query_embedding(
        "What's my name? I bet you don't remember..."
    ),
    embedding_model.get_query_embedding("What's the point of life anyways?"),
    embedding_model.get_query_embedding("The point of life is to have fun :D"),
]
print("Shape:", (len(q_embeddings), len(q_embeddings[0])))

In [None]:

from llama_index.core.llms import ChatMessage, MessageRole

llm.model

In [None]:
messages = [
    ChatMessage(
        role=MessageRole.SYSTEM, content=("You are a helpful assistant.")
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=("What are the most popular house pets in North America?"),
    ),
]

llm.chat(messages)

In [None]:
type(llm.chat(messages))