In [1]:
import setup
import os
import numpy as np
from decouple import config, AutoConfig
config = AutoConfig(search_path="/home/harry/chatbotDjango") 

setup.init_django()

In [2]:
from data.models import BlogPost, EMEDDING_LENGTH
from data import services

In [3]:
# qs = BlogPost.objects.filter(can_delete=True)
# qs

In [4]:
from llama_index.llms.mistralai import MistralAI
from llama_index.embeddings.mistralai import MistralAIEmbedding

In [5]:
from mistralai import Mistral

MISTRAL_API_KEY = config("MISTRAL_API_KEY")

EMEDDING_LENGTH=config("EMEDDING_LENGTH", default=1024, cast=int)

LLM_CONFIG = {
    "api_key" : MISTRAL_API_KEY,
}

EMBED_CONFIG = {
    "api_key" : MISTRAL_API_KEY,
    "model" : "mistral-embed"
}

llm = MistralAI(**LLM_CONFIG)
embed_model = MistralAIEmbedding(**EMBED_CONFIG)

In [6]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [7]:
vector_db_name = "vector_db"
vector_db_table_name = "blogpost"

In [8]:
from decouple import config, AutoConfig
config = AutoConfig(search_path="/home/harry/chatbotDjango") 
DATABASE_URL = config("DATABASE_URL_POOL")
if DATABASE_URL.startswith("postgres://"):
    DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)

In [9]:
from sqlalchemy import create_engine, text

engine = create_engine(DATABASE_URL, isolation_level="AUTOCOMMIT")
with engine.connect() as connection:
    result = connection.execute(text("SELECT 1 FROM pg_database WHERE datname = :db_name"), {"db_name": vector_db_name})
    db_exists = result.scalar() == 1
    if not db_exists:
        session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
        connection.execute(text(f"CREATE DATABASE {vector_db_name}"))

In [10]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

url = make_url(DATABASE_URL)
vector_store = PGVectorStore.from_params(
    database=vector_db_name,
    host=url.host,
    password=url.password,
    port=url.port or 5432,
    user=url.username,
    table_name=vector_db_table_name,
    embed_dim=EMEDDING_LENGTH,
)

In [11]:
from llama_index.core import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)
query_engine = index.as_query_engine()

In [12]:
from llama_index.core import Document

docs = []
qs = BlogPost.objects.filter(can_delete=True)
for obj in qs:
    docs.append(
        Document(
            text=f"{obj.get_embedding_text_raw()}",
            doc_id=str(obj.id),
            metadata = {
                "pk": obj.pk,
                "title": obj.title
            }
        )
    )

docs

[Document(id_='25', embedding=None, metadata={'pk': 25, 'title': 'Blog Post 1'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Harry Was Here before you', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'),
 Document(id_='26', embedding=None, metadata={'pk': 26, 'title': 'Blog Post 2'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='You Were Here before Harry', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'),
 Document(id_='27', embedding=None, metadata={'pk': 27, 'title': 'Blog Post 3'}

In [19]:
for doc in docs:
    index.delete_ref_doc(f"{doc.id_}", delete_from_docstore=True)
    index.insert(doc)

In [23]:
response = query_engine.query("Harry Was")

In [24]:
for k in response.metadata.keys():
    for subk, v in response.metadata[k].items():
        print(subk, v)

pk 25
title Blog Post 1
pk 27
title Blog Post 3
