In [4]:
from llama_index.core import VectorStoreIndex, Settings, Document,StorageContext
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors.llm_selectors import LLMSingleSelector
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from typing import Optional, List, Mapping, Any
from llama_index.core.vector_stores import FilterCondition, MetadataFilter, MetadataFilters
import logging
from dotenv import load_dotenv
import os
import uuid

In [5]:
# Initial dataset
initial_data = [
    "Drug X-101: Shows 75% efficacy in Phase II trials for Alzheimer's",
    "Compound Y-202: Potential neuroprotective effects observed in mice models",
    "Drug Z-303: FDA approval pending for Phase III trials in Parkinson's"
]

# Daily updates (simulated)
daily_updates = [
    "Drug X-101: Phase III trial approved by FDA",
    "Compound Y-202: Patent filed in US and EU",
    "Drug Z-303: New side effects reported in Phase III trials"
]

# Convert to LlamaIndex documents
initial_docs = [Document(text=text, metadata={"version": "v1"}) for text in initial_data]
update_docs = [Document(text=text, metadata={"version": "v2"}) for text in daily_updates]

In [6]:
# Create storage context
storage_context = StorageContext.from_defaults()
current_version = str(uuid.uuid4())  # Unique version ID

In [8]:
# Initialize Groq LLM
groq_api_key = os.getenv("GROQ_API_KEY")
llm = Groq(model="mixtral-8x7b-32768", api_key=groq_api_key)

# Configure global settings
Settings.llm = llm
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")



In [9]:
# Build initial index
index = VectorStoreIndex.from_documents(
    initial_docs,
    storage_context=storage_context
)


In [10]:
# Persist the index
storage_context.persist(persist_dir=f"./drug_index_{current_version}")
print(f"Initial index created and persisted as version {current_version}.")

Initial index created and persisted as version e0f18a58-1b41-4180-abdd-a6f81fb4ccee.


In [13]:
from llama_index.core import load_index_from_storage
# Load the existing index
storage_context = StorageContext.from_defaults(persist_dir=f"./drug_index_{current_version}")

index = load_index_from_storage(storage_context)

In [14]:
# Add new documents
index.insert_nodes(update_docs)

In [15]:
# Persist the updated index
new_version = str(uuid.uuid4())
storage_context.persist(persist_dir=f"./drug_index_{new_version}")
print(f"Index updated and persisted as version {new_version}.")

Index updated and persisted as version 49d1d346-2895-491f-bc73-bfae6cbba109.


In [17]:
# Load a specific version
def load_version(version_id):
    storage_context = StorageContext.from_defaults(persist_dir=f"./drug_index_{version_id}")
    return load_index_from_storage(StorageContext.from_defaults(persist_dir=f"./drug_index_{version_id}"))

In [18]:
# Query version 1
v1_index = load_version(current_version)
v1_engine = v1_index.as_query_engine()
v1_response = v1_engine.query("What is the status of Drug X-101?")
print("Version 1 Response:\n", v1_response)

Version 1 Response:
 Drug X-101 has demonstrated 75% efficacy in Phase II trials for Alzheimer's.


In [19]:
# Query version 2
v2_index = load_version(new_version)
v2_engine = v2_index.as_query_engine()
v2_response = v2_engine.query("What is the status of Drug X-101?")
print("Version 2 Response:\n", v2_response)

Version 2 Response:
 Based on the information provided, Drug X-101 has progressed to Phase III trials, which indicates it has successfully completed Phase II trials. In Phase II trials, it demonstrated an efficacy of 75% in treating Alzheimer's.


In [None]:
#Automatic Versioning
from datetime import datetime

def get_version_id():
    return datetime.now().strftime("%Y%m%d_%H%M%S")

version_id = get_version_id()
storage_context.persist(persist_dir=f"./drug_index_{version_id}")

In [22]:
#Change Tracking
def get_changes(old_index, new_index):
    old_docs = set(doc.text for doc in old_index.docstore.docs.values())
    new_docs = set(doc.text for doc in new_index.docstore.docs.values())
    return new_docs - old_docs

changes = get_changes(v1_index, v2_index)
print("Changes in latest version:\n", changes)

Changes in latest version:
 {'Drug X-101: Phase III trial approved by FDA', 'Drug Z-303: New side effects reported in Phase III trials', 'Compound Y-202: Patent filed in US and EU'}


In [23]:
#Rollback Mechanism
def rollback_version(version_id):
    storage_context = StorageContext.from_defaults(persist_dir=f"./drug_index_{version_id}")
    return load_index_from_storage(StorageContext.from_defaults(persist_dir=f"./drug_index_{version_id}"))

# Rollback to version 1
rollback_index = rollback_version(current_version)