In [11]:
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
from langchain_mistralai import MistralAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os

In [3]:
load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

In [8]:
qdrant_client = QdrantClient(
    url=QDRANT_URL, 
    api_key=QDRANT_API_KEY,
)

print(qdrant_client.get_collections())

collections=[]


In [9]:
embeddings = MistralAIEmbeddings(model="mistral-embed")
vector_size = len(embeddings.embed_query("sample text"))
collection_articles = "nasa_articles"
try:
    qdrant_client.get_collection(collection_name=collection_articles)
except Exception:
    qdrant_client.create_collection(
        collection_name=collection_articles,
        vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
    )

  from .autonotebook import tqdm as notebook_tqdm
  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)


In [10]:
print(qdrant_client.get_collections())

collections=[CollectionDescription(name='nasa_articles')]


In [28]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    model_name="gpt-4",
    chunk_size=500,
    chunk_overlap=200,
)

In [15]:
from pathlib import Path

ruta = Path("md_out/www.ncbi.nlm.nih.gov/pmc/articles/PMC2824534/index.md")

with ruta.open("r", encoding="utf-8") as f:
    contenido_md = f.read()


In [16]:
contenido_md[:500]

'Biomaterials\n\n. Author manuscript; available in PMC: 2011 Mar 1.\n\n*Published in final edited form as:* Biomaterials. 2010 Jan 19;31(8):2193–2200. doi: [10.1016/j.biomaterials.2009.11.092](https://doi.org/10.1016/j.biomaterials.2009.11.092)\n\n# *In vitro* generation of mechanically functional cartilage grafts based on adult human stem cells and 3D-woven poly(ε-caprolactone) scaffolds\n\n[PK Valonen](https://pubmed.ncbi.nlm.nih.gov/?term=%22Valonen%20PK%22%5BAuthor%5D)\n\n### PK Valonen\n\n1Harvard-MIT D'

In [23]:
texts = text_splitter.split_text(contenido_md)

In [24]:
len(texts)

9

In [26]:
print(texts[0])

Biomaterials

. Author manuscript; available in PMC: 2011 Mar 1.

*Published in final edited form as:* Biomaterials. 2010 Jan 19;31(8):2193–2200. doi: [10.1016/j.biomaterials.2009.11.092](https://doi.org/10.1016/j.biomaterials.2009.11.092)

# *In vitro* generation of mechanically functional cartilage grafts based on adult human stem cells and 3D-woven poly(ε-caprolactone) scaffolds

[PK Valonen](https://pubmed.ncbi.nlm.nih.gov/?term=%22Valonen%20PK%22%5BAuthor%5D)

### PK Valonen

1Harvard-MIT Division of Health Sciences and Technology, Massachusetts Institute of Technology, Cambridge, MA, 02139 USA

Find articles by [PK Valonen](https://pubmed.ncbi.nlm.nih.gov/?term=%22Valonen%20PK%22%5BAuthor%5D)

1, [FT Moutos](https://pubmed.ncbi.nlm.nih.gov/?term=%22Moutos%20FT%22%5BAuthor%5D)

### FT Moutos

2Departments of Surgery, Biomedical Engineering, and Mechanical Engineering & Materials Science, Duke University Medical Center, Durham, NC 27710 USA

Find articles by [FT Moutos](https://pub

In [1]:
import streamlit as st
# Assuming get_graph is in components/kw_graph.py
# from components.kw_graph import get_graph 
from streamlit_agraph import agraph, Node, Edge, Config
import json
from qdrant_client import QdrantClient
from langchain_mistralai import MistralAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from dotenv import load_dotenv
import os

load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
CELLECTION_NAME = "tasks_proyects"
client = QdrantClient(
        url=QDRANT_URL, 
        api_key=QDRANT_API_KEY,
    )
embeddings = MistralAIEmbeddings(model="mistral-embed")
store = QdrantVectorStore(
        client=client,
        collection_name=CELLECTION_NAME,
        embedding=embeddings,
    )


  from .autonotebook import tqdm as notebook_tqdm
  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)


In [2]:
retrieved_docs = store.similarity_search('hola', k=10)


In [3]:
retrieved_docs

[Document(metadata={'project_title': 'Genomic and Functional Analysis of Desiccation Tolerance of International Space Station Isolated Enterococcus faecalis and Their Pathogenicity in Caenorhabditis elegans   Genomic and Functional Analysis of Biofilm Morphotypes of International Space Station Isolated Staphylococcus epidermidis and Their Pathogenicity in Caenorhabditis elegans)Reduce', 'fiscal_year': 'FY 2020', 'division': 'Space Biology', 'research_discipline/element': '', 'start_date': '09/13/2017', 'end_date': '11/30/2019', 'task_last_updated': '01/24/2020', 'research_discipline_element': 'Fiscal Year:FY 2020Division:Space BiologyResearch Discipline/Element:Space Biology: Microbiology', 'pdf_download_link': 'tbpdf.cfm?id=12404', 'principal_investigator': {}, 'project_information': {'responsible_center': 'NASA ARC', 'grant_monitor': 'Griko, Yuri (Yuri.V.Griko@nasa.gov)', 'center_contact': '650-604-0519', 'unique_id': '11568', 'solicitation__funding_source': '2016 Space Biology (ROSB

In [4]:
retrieved_docs[0].metadata['id']

'11864'