In [1]:
import os
from tqdm import tqdm
from datasets import load_dataset
from elasticsearch import Elasticsearch
from langchain_community.vectorstores import ElasticsearchStore
from langchain_community.document_loaders import TextLoader
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import SpacyTextSplitter
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from opensearch_utils import *
from huggingface_utils import *
from elasticsearch import Elasticsearch
from getpass import getpass

from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
HUGGINGFACE_USERNAME = os.getenv('HUGGINGFACE_USERNAME')
HUGGINGFACE_DATASET_NAME = os.getenv('HUGGINGFACE_DATASET_NAME')
ELASTIC_CLOUD_ID = os.getenv('ELASTIC_CLOUD_ID')
ELASTIC_API_KEY = os.getenv('ELASTIC_API_KEY')

model_name = "NeuML/pubmedbert-base-embeddings"

from langchain_community.embeddings import HuggingFaceEmbeddings

device = 'cpu' # make sure you are on gpu
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': device},
    encode_kwargs={'device': device}
)

In [2]:
index_name = 'qa_project_pubmedbert' # previously index="test_pubmed_split"
elastic_vector_search = ElasticsearchStore(
    es_cloud_id=ELASTIC_CLOUD_ID,
    index_name=index_name,
    embedding=embeddings,
    es_api_key=ELASTIC_API_KEY,
)

In [3]:
from langchain_community.document_loaders import HuggingFaceDatasetLoader

loader = HuggingFaceDatasetLoader(HUGGINGFACE_DATASET_NAME,use_auth_token=HUGGINGFACE_TOKEN,page_content_column='Abstract')
data = loader.load()



In [36]:
from pprint import pprint
pprint(data[77].metadata.keys())

dict_keys(['Journal', 'Title', 'Authors', 'Author_Information', 'DOI', 'Misc', 'Published_date'])


In [57]:
query = "What is the role of artificial intelligence in nephrology?"
results = elastic_vector_search.similarity_search(query,k=50)


titles_elastic = [res.metadata["Title"] for res in results]
for res in results:
    print(res.metadata['Title'])

Artificial Intelligence in Nephrology Core Concepts Clinical Applications and Perspectives
Artificial Intelligence in Kidney Cancer
Kidney cancer management 3 0 can artificial intelligence make us better
Artificial Intelligence in Pediatric Nephrology A Call for Action
Artificial intelligence and machine learning in nephropathology
The promise of artificial intelligence for kidney pathophysiology
Artificial intelligence the future of urinary stone management
Time for a full digital approach in nephropathology a systematic review of current artificial intelligence applications and future directions
Artificial intelligence approaches to improve kidney care
Machine learning in medicine Medical droids tricorders and a computer named Hal 9000
Overcoming barriers to implementation of artificial intelligence in gastroenterology
Artificial intelligence enabled decision support in nephrology
Artificial intelligence in the diagnosis treatment and prevention of urinary stones
Artificial intellige

In [24]:
# %pip install pymed

from pymed import PubMed

pubmed = PubMed(tool="MyTool", email='test')

results = pubmed.query('artificial intelligence in nephrology', max_results=20)

## see: https://github.com/gijswobben/pymed/blob/master/pymed/article.py
'''
these dont match with the ones I see in the browser
'''

for i,res in enumerate(results):
    print(i,") ",res.title)

In [50]:
# %pip install biopython
from Bio import Entrez

def search(query):
    Entrez.email = 'your.email@example.com'
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='150',
                            retmode='xml',
                            term=query)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your.email@example.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

results=search('What is the role of artificial intelligence in nephrology?')
id_list = results['IdList']
papers = fetch_details(id_list)

titles_bio = [paper['MedlineCitation']['Article']['ArticleTitle'] for paper in papers['PubmedArticle']]
for i, paper in enumerate(papers['PubmedArticle']):
         print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))


1) Artificial Intelligence in Nephrology: Core Concepts, Clinical Applications, and Perspectives.
2) Intradialytic Hypotension: Mechanisms and Outcome.
3) Artificial intelligence-enabled decision support in nephrology.
4) Role of Artificial Intelligence in Kidney Disease.
5) A Review of the Role of Artificial Intelligence in Healthcare.
6) Exploring the Potential of Chatbots in Critical Care Nephrology.
7) Artificial Intelligence in Pediatric Nephrology-A Call for Action.
8) Artificial intelligence at the time of COVID-19: who does the lion's share?
9) Toward generalizing the use of artificial intelligence in nephrology and kidney transplantation.
10) Artificial Intelligence in Nephrology: How Can Artificial Intelligence Augment Nephrologists' Intelligence?
11) Identification of Markers for Diagnosis and Treatment of Diabetic Kidney Disease Based on the Ferroptosis and Immune.
12) Lipophagy deficiency exacerbates ectopic lipid accumulation and tubular cells injury in diabetic nephropat

In [58]:
## check overlap between my results and the ones from the browser
## nu au punctuatie unele titluri

import re
## strip punctuation and lowercase
titles_elastic = [re.sub(r'[^\w\s]', '', title.lower()) for title in titles_elastic]
titles_bio = [re.sub(r'[^\w\s]', '', title.lower()) for title in titles_bio]

common_articles = set(titles_elastic).intersection(set(titles_bio))
common_articles

'''
CONCLUZIE:

rezultatele de pe pubmed prin api sunt destul de diferite de cele de la noi si e de asteptat nu putem sa folosim 
pe alea pentru evaluare ca baseline
'''

{'artificial intelligence in nephrology core concepts clinical applications and perspectives',
 'artificial intelligence in surgical training for kidney cancer a systematic review of the literature'}