In [5]:
import os
import nest_asyncio
from dotenv import load_dotenv
from langchain_community.vectorstores import Milvus
from langchain.embeddings import SentenceTransformerEmbeddings

# Load environment variables
load_dotenv()

# Embedding model
EMBEDDING_MODEL_NAME = "multi-qa-MiniLM-L6-cos-v1"
embedding_model = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL_NAME)


# Connection args
MILVUS_CONN = {"host": "127.0.0.1", "port": "19530"}


def get_vector_store(collection_name: str):
    """Helper to get a Milvus vector store for a collection."""
    return Milvus(
        embedding_function=embedding_model,
        collection_name=collection_name,
        connection_args=MILVUS_CONN,
        vector_field="embedding",
        text_field="content"
    )


def search_publications(query: str, k: int = 5):
    """Search the publications collection."""
    vector_store = get_vector_store("publications")
    retriever = vector_store.as_retriever(search_type="similarity",
                                          search_kwargs={"k": k, "metric_type": "COSINE"})
    docs = retriever.get_relevant_documents(query)
    results = [
        {
            "PMC_code": d.metadata.get("PMC_code"),
            "name": d.metadata.get("name"),
            "authors": d.metadata.get("authors"),
            "date": d.metadata.get("date"),
            "doi": d.metadata.get("doi"),
            "text": d.page_content
        }
        for d in docs
    ]
    return results


def search_osdr(query: str, k: int = 5):
    """Search the osdr collection."""
    vector_store = get_vector_store("osdr")
    retriever = vector_store.as_retriever(search_type="similarity",
                                          search_kwargs={"k": k, "metric_type": "COSINE"})
    docs = retriever.get_relevant_documents(query)
    results = [
        {
            "study_id": d.metadata.get("study_id"),
            "name": d.metadata.get("name"),
            "organisms": d.metadata.get("organisms"),
            "authors": d.metadata.get("authors"),
            "doi": d.metadata.get("doi"),
            "link": d.metadata.get("link"),
            "type": d.metadata.get("type"),
            "protocol_name": d.metadata.get("protocole_name"),
            "text": d.metadata.get("content")
        }
        for d in docs
    ]
    return results


In [6]:
search_publications("Mice in Bion-M 1 Space Mission")

[{'PMC_code': 'PMC6165321',
  'name': 'Impact of Spaceflight and Artificial Gravity on the Mouse Retina: Biochemical and Proteomic Analysis',
  'authors': 'Masaki Shirakawa,Vijayalakshmi Sridharan,Marjan Boerma,Nina C Nishiyama,Michael J Pecaut,Michael D Delp,Stephanie Byrum,Alan J Tackett,Xiao W Mao,Satoru Takahashi,Dai Shiba',
  'date': '2018-08-28',
  'doi': '10.3390/ijms19092546',
  'text': 'mice (n = 12) were launched from the Kennedy Space Center on a SpaceX rocket to the ISS for a 35-day mission. The animals were housed in the mouse Habitat Cage Unit (HCU) in the Japan Aerospace Exploration Agency (JAXA) “Kibo” facility on the ISS. The flight mice lived either under an ambient microgravity condition (µg) or in a centrifugal habitat unit that produced 1 g artificial gravity (µg + 1 g). Habitat control (HC) and vivarium control mice lived on Earth in HCUs or normal vivarium cages, respectively. Quantitative assessment of ocular tissue demonstrated that the µg group induced'},
 {'P

In [11]:
from pymilvus import Collection

c = Collection("publications")
print(c.describe())


{'collection_name': 'publications', 'auto_id': True, 'num_shards': 1, 'description': 'RAG collection with publication metadata', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'params': {}, 'auto_id': True, 'is_primary': True}, {'field_id': 101, 'name': 'embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}, {'field_id': 102, 'name': 'PMC_code', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 20}}, {'field_id': 103, 'name': 'name', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 300}}, {'field_id': 104, 'name': 'content', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2000}}, {'field_id': 105, 'name': 'authors', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2000}}, {'field_id': 106, 'name': 'date', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 20}}, {'field_id':