In [1]:

from llama_index.core import  Document, VectorStoreIndex
from llama_index.core import Settings
from llama_index.readers.confluence import ConfluenceReader
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.sagemaker_endpoint import SageMakerEmbedding
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from weaviate import Client
import os
import dotenv
from pydantic import BaseModel
from utils.indexing import process_documents


            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [2]:
dotenv.load_dotenv()


True

In [3]:
client = Client(os.getenv('WEAVIATE_CLIENT'))

Settings.llm = Ollama(base_url=os.getenv('LLM_ENDPOINT'), model="mistral:7b-instruct-v0.3-q6_K")

embed_model = SageMakerEmbedding(
    endpoint_name=os.getenv('SAGEMAKER_ENDPOINT'),
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    region_name=os.getenv("AWS_REGION")
)


In [4]:
# Initialize LLM and Embedding model
llm = Settings.llm
Settings.embed_model = embed_model

index = None

In [5]:
from llama_index.readers.confluence import ConfluenceReader
import os 
os.environ['CONFLUENCE_USERNAME'] = ""
os.environ['CONFLUENCE_PASSWORD'] = ""

reader=ConfluenceReader(base_url="")

In [6]:
documents=reader.load_data(space_key="")

In [7]:
documents

[Document(id_='98395', embedding=None, metadata={'title': 'Overview', 'page_id': '98395', 'status': 'current', 'url': 'https://abdelkerim-doc-assistant.atlassian.net/wiki/spaces/~712020a5e8623ecd6e48b6b43e65f918ce73b6/overview'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Say hello to your colleagues who want to know your name, pronouns, role, team\nand location (or if you\'re remote).\n\n## 📄 Recent pages that I\'ve worked on\n\n## Recent updates\n\n  * DocMate Operations Manual\n\nOct 28, 2024 • contributed by Abdelkerim Dassi\n\n  * DocMate Security Documentation\n\nOct 28, 2024 • contributed by Abdelkerim Dassi\n\n  * DocMate Contracts Manual\n\nOct 28, 2024 • contributed by Abdelkerim Dassi\n\n  * DocMate Technical Manual\n\nOct 28, 2024 • contributed by Abdelkerim Dassi\n\n  * AppZ Security Documentation\n\nOct 28, 2024 • contributed by Abdelkerim Dassi\n\nShow More\n\n## Blog stream\n\nCreate a blog post to share news and announcement

In [8]:
class InputData(BaseModel):
    data: list[Document]

c:\Users\abdel\Desktop\Chatbot\2024_abdelakrim_dassi\chatbot_venv\lib\site-packages\pydantic\_internal\_generate_schema.py:658: PydanticDeprecatedSince20: `__get_validators__` is deprecated and will be removed, use `__get_pydantic_core_schema__` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
  warn(


In [9]:
try:
        reader = ConfluenceReader(base_url="https://abdelkerim-doc-assistant.atlassian.net/wiki")
        #documents=reader.load_data(page_ids=["36110352"])        
        # Process and index documents
        process_documents(documents)
        print( "Data indexed successfully")
except Exception as e:
        print("ERROR:",str(e))

Data indexed successfully


In [10]:
vector_store = WeaviateVectorStore(
       weaviate_client=client, index_name=os.getenv("index_name")
       )
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [11]:
retriever=index.as_retriever(similarity_top_k=3)

In [12]:
nodes =retriever.retrieve("What is DocMate ?")
for node in nodes:
    print(node.metadata)

{'title': 'DocMate Technical Manual', 'page_id': '35422210', 'status': 'current', 'url': 'https://abdelkerim-doc-assistant.atlassian.net/wiki/spaces/~712020a5e8623ecd6e48b6b43e65f918ce73b6/pages/35422210/DocMate+Technical+Manual'}
{'title': 'DocMate Operations Manual', 'page_id': '35389456', 'status': 'current', 'url': 'https://abdelkerim-doc-assistant.atlassian.net/wiki/spaces/~712020a5e8623ecd6e48b6b43e65f918ce73b6/pages/35389456/DocMate+Operations+Manual'}
{'title': 'DocMate Security Documentation', 'page_id': '35422223', 'status': 'current', 'url': 'https://abdelkerim-doc-assistant.atlassian.net/wiki/spaces/~712020a5e8623ecd6e48b6b43e65f918ce73b6/pages/35422223/DocMate+Security+Documentation'}
