In [3]:
import os
import xml.etree.ElementTree as ET
import json
from Bio import Entrez
import requests
from selenium import webdriver
from bs4 import BeautifulSoup  # For HTML parsing
import time
from tqdm import tqdm


### GET DISEASES DATA FROM ORPHANET FILES

The list of rare diseases are available here  -  https://www.orphadata.com/classifications/

In [None]:
def read_files_from_folder(folder_path):
    files = os.listdir(folder_path)
    return files

def parse_orphanet_xml(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()

    diseases = []
    for disorder in root.findall('.//Disorder'):
        name = disorder.find('.//Name[@lang="en"]')
        orpha_code = disorder.find('.//OrphaCode')
        if name is not None and orpha_code is not None:
            diseases.append({'name': name.text, 'orpha_code': orpha_code.text})
    return diseases

In [None]:
diseases_dict = {}
for file in read_files_from_folder('Rare_Diseases'):
    if file.endswith('.xml'):
        diseases = parse_orphanet_xml('Rare_Diseases/' + file)
        file_name = file.split('.')[0]
        diseases_dict[file_name] = diseases


--------------------------End Orphanet data------------------------------

### Begin Pubmed Data fetch

In [None]:
EMAIL = 'abc@gmail.com'

In [None]:
def search_pubmed(query, max_results=10):
    Entrez.email = EMAIL  # Always provide your email
    query_with_filter = query + " AND free full text[sb]"  # Adding the free full text filter
    handle = Entrez.esearch(db='pubmed', 
                            sort='relevance', 
                            retmax=max_results,
                            retmode='xml', 
                            term=query_with_filter)
    results = Entrez.read(handle)

    if results['IdList'] == []:
        handle = Entrez.esearch(db='pubmed', 
                            sort='relevance', 
                            retmax=max_results,
                            retmode='xml', 
                            term=query)
        
        results = Entrez.read(handle)
    return results['IdList']


def fetch_pubmed_details(id_list):
    Entrez.email = EMAIL 
    if len(id_list) == 0:
        return None
    ids = ','.join(id_list)
    handle = Entrez.efetch(db='pubmed', id=ids, retmode='xml')
    papers = Entrez.read(handle)
    return papers

In [None]:
disease_articles = {}

In [None]:
all_diseases = set()
for disease_name, diseases in diseases_dict.items():
    for disease in diseases:
        all_diseases.add(disease['name'])

In [None]:
for disease in all_diseases:
    if disease in disease_articles and disease_articles[disease] != []:
        continue    
    query = f'{disease}'
    disease_articles[disease] = search_pubmed(query, 50)

In [None]:
# logged on 8th Dec - 12:09 AM - json dumped at 12:10 AM
with open('disease_articles.json', 'w') as f:
    json.dump(disease_articles, f)


In [None]:
# load the json file for disease articles if present else create a new one
with open('disease_articles.json', 'r') as f:
    disease_articles = json.load(f)

In [None]:
papers_list = dict()

In [None]:

for disease, ids in tqdm(disease_articles.items()):
    if disease in papers_list and papers_list[disease] != []:
        continue
    papers_list[disease] = fetch_pubmed_details(ids)
    time.sleep(1)



In [None]:
disease_articles_new = {key: value for key, value in disease_articles.items() if value}
len(disease_articles_new)

In [None]:
combined_data = list()

def get_article_details(diseases_dict:dict, disease_articles:dict,combined_data:list):
    for category, diseases in diseases_dict.items():
        print(f"Processing {category}...")
        if os.path.exists(f"combined_data/{category}.json"):
            continue

        for disease in tqdm(diseases):
            disease_data = {
                'name': disease['name'],
                'orpha_code': disease['orpha_code'],
                'articles': []
            }
            
            if disease['name'] in disease_articles:
                ids = disease_articles_new[disease['name']][:5]
                papers = fetch_pubmed_details(ids)
                time.sleep(0.5)
                if papers:
                    for paper in papers['PubmedArticle']:
                        pmid = paper['MedlineCitation']['PMID'].title()
                        # Extract necessary details from each paper
                        article_data = paper['MedlineCitation']['Article']
                        if article_data['ELocationID'] and article_data['ELocationID'][0].attributes['EIdType'] == 'doi':
                            doi = article_data['ELocationID'][0].title()

                        article_url = f"https://doi.org/{doi}" if doi else ''
                        title = article_data['ArticleTitle']
                        abstract = paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0] if 'Abstract' in paper['MedlineCitation']['Article'] else ''
                        disease_data['articles'].append({'PMID':pmid, 'title': title, 'abstract': abstract, 'article_url': article_url})

            combined_data.append(disease_data)


        # Create the "combined_data" folder if it doesn't exist
        if not os.path.exists("combined_data"):
            os.makedirs("combined_data")

        # Dump the combined_data dictionary to a JSON file in the "combined_data" folder
        with open(f"combined_data/{category}.json", "w") as f:
            json.dump(combined_data, f)

        combined_data.clear()



In [None]:
get_article_details(diseases_dict, disease_articles_new,combined_data)

In [None]:
def get_full_text_from_doi(doi_url):
    # Use Selenium to handle JavaScript-enabled requests
    options = webdriver.ChromeOptions()
    options.add_argument('window-size=1920x1080')  # Set the window size
    options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36')
    options.add_argument('--headless')  # Run Chrome in headless mode
    with webdriver.Chrome(options=options) as driver:
        driver.get(doi_url)
        time.sleep(2)
        html = driver.page_source

    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')
    article_tag = soup.find('article')
    if article_tag:
        article_text = article_tag.get_text()
    else:
        article_text = soup.get_text()

    return article_text



In [2]:
from bs4 import BeautifulSoup
import re

def clean_html(html_content):
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    article_tag = soup.find('article')
 
    # Remove script and style elements
    for script_or_style in soup(['script', 'style']):
        script_or_style.extract()

    # Get text
    text = ''
    if article_tag:
        text = article_tag.get_text()
    else:
        text = soup.get_text()

    # Break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())

    # Break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))

    # Drop blank lines and remove non-ascii characters
    text = '\n'.join(chunk for chunk in chunks if chunk)
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)

    return text


In [3]:
options = webdriver.ChromeOptions()
options.add_argument('window-size=1920x1080')  # Set the window size
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36')
options.add_argument('--headless')  # Run Chrome in headless mode

In [None]:
combined_data_folder = 'combined_data'
for file_name in (os.listdir(combined_data_folder)):
    print(f"Processing {file_name}...")
    if os.path.exists(f"final_data/{file_name}"):
        continue
    file_path = os.path.join(combined_data_folder, file_name)
    with open(file_path, 'r') as f:
        final_data = json.load(f)
        # Process the data here
        with webdriver.Chrome(options=options) as driver:
            for data in tqdm(final_data[:250]):
                for article in data['articles']:
                    if 'full_text' in article:
                        continue
                    if article['article_url']:
                        driver.get(article['article_url'])
                        time.sleep(4)
                        html = driver.page_source
                        # Parse the HTML using BeautifulSoup
                        article_text = clean_html(html)
                        article['full_text'] = article_text        # Add the loaded file to the list

         
        # Create the "final_data" folder if it doesn't exist
        if not os.path.exists("final_data"):
            os.makedirs("final_data")

        # Dump the combined_data dictionary to a JSON file in the "final_data" folder
        with open(f"final_data/{file_name}", "w") as f:
            json.dump(final_data, f)  

        final_data = []
    

## Data processing and cleaning 

In [1]:
import pandas as pd
import nltk
nltk.download('stopwords')
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/chintanaddoni/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:

final_data_folder = 'final_data'
combined_data_df = []
df = pd.DataFrame(columns=['name', 'orpha_code', 'PMID', 'title', 'abstract', 'article_url', 'full_text'])

for file_name in os.listdir(final_data_folder):
    file_path = os.path.join(final_data_folder, file_name)
    with open(file_path, 'r') as f:
        data = json.load(f)
        combined_data_df = []
        for disease_data in data:
            name = disease_data['name']
            orpha_code = disease_data['orpha_code']
            articles = disease_data['articles']
            for article in articles:
                if 'full_text' in article: 
                    if len(article['full_text']) < 1000:
                        article['full_text'] = article['abstract']
                    combined_data_df.append([name, orpha_code, article['PMID'], article['title'], article['abstract'], article['article_url'], article['full_text']])
                else:
                    article['full_text'] = article['abstract']
                    combined_data_df.append([name, orpha_code, article['PMID'], article['title'], article['abstract'], article['article_url'], article['full_text']])

    # Append combined_data_df to df using pd.concat()
    df = pd.concat([df, pd.DataFrame(combined_data_df, columns=df.columns)], ignore_index=True)
    df.drop_duplicates(subset = df.columns.difference(['full_text', 'article_url']),inplace=True)




In [16]:
df.shape

(27507, 7)

In [17]:
df.head()

Unnamed: 0,name,orpha_code,PMID,title,abstract,article_url,full_text
0,Rare teratologic disease,52662,33745447,Diagnostic precision and identification of rar...,Diagnostic precision and the identification of...,https://doi.org/10.1002/Jimd.12306,Journal of Inherited Metabolic DiseaseVolume 4...
1,Rare teratologic disease,52662,36401554,Prevalence and mortality among children with a...,"We examined the total prevalence, trends in pr...",https://doi.org/10.1002/Bdr2.2129,"We examined the total prevalence, trends in pr..."
2,Rare teratologic disease,52662,27126916,Frederik Ruysch (1638-1731): Historical perspe...,The Peter the Great Museum of Anthropology and...,https://doi.org/10.1002/Ajmg.A.37663,The Peter the Great Museum of Anthropology and...
3,Rare teratologic disease,52662,35644130,A Multicountry Analysis of Prevalence and Mort...,Bladder exstrophy (BE) is a rare but severe b...,https://doi.org/10.1055/S-0042-1748318,Subscribe to RSS\nPlease copy the URL and add ...
4,Rare teratologic disease,52662,33253899,Prevalence and mortality in children with cong...,"This study determined the prevalence, mortalit...",https://doi.org/10.1016/J.Annepidem.2020.11.007,"Annals of EpidemiologyVolume 56, April 2021, P..."


In [18]:

# Set of English stopwords
stop_words = set(stopwords.words('english'))

def remove_citations(text):
    # Remove citations references (e.g., [1], [1,2], [1-3])
    text = re.sub(r'\[\d+(,\s?\d+)*(\s?-\s?\d+)?\]', '', text)
    # Remove URLs
    text = re.sub(r'http\S+|www\S+', '', text)
    return text
 

def remove_references_section(text):
    # Naive approach to remove references section
    text = re.sub(r'\b(references|bibliography)\b.*', '', text, flags=re.IGNORECASE | re.DOTALL)
    return text

def tokenize_and_clean(text):
    # Tokenize into words
    words = word_tokenize(text)

    # Remove stopwords and non-alphabetic words
    words = [word.lower() for word in words if word.isalpha() and word.lower() not in stop_words]

    # # add stemming
    # stemmer = PorterStemmer()
    # words = [stemmer.stem(word) for word in words]

    return words

def preprocess_text(text):
    text = remove_citations(text)
    text = remove_references_section(text)
    words = tokenize_and_clean(text)
    return ' '.join(words)


In [19]:
df['cleaned_text'] = df['full_text'].apply(preprocess_text)

In [20]:
df.to_json('final_data_cleaned.json', orient='records')

## Run from here to test the model with the processed data using above code. (Do run the above cells to import packages)

In [50]:
df = pd.read_json('final_data_cleaned.json')

In [21]:
df.head()

Unnamed: 0,name,orpha_code,PMID,title,abstract,article_url,full_text,cleaned_text
0,Rare teratologic disease,52662,33745447,Diagnostic precision and identification of rar...,Diagnostic precision and the identification of...,https://doi.org/10.1002/Jimd.12306,Journal of Inherited Metabolic DiseaseVolume 4...,journal inherited metabolic diseasevolume issu...
1,Rare teratologic disease,52662,36401554,Prevalence and mortality among children with a...,"We examined the total prevalence, trends in pr...",https://doi.org/10.1002/Bdr2.2129,"We examined the total prevalence, trends in pr...",examined total prevalence trends prevalence mo...
2,Rare teratologic disease,52662,27126916,Frederik Ruysch (1638-1731): Historical perspe...,The Peter the Great Museum of Anthropology and...,https://doi.org/10.1002/Ajmg.A.37663,The Peter the Great Museum of Anthropology and...,peter great museum anthropology ethnography ku...
3,Rare teratologic disease,52662,35644130,A Multicountry Analysis of Prevalence and Mort...,Bladder exstrophy (BE) is a rare but severe b...,https://doi.org/10.1055/S-0042-1748318,Subscribe to RSS\nPlease copy the URL and add ...,subscribe rss please copy url add rss feed rea...
4,Rare teratologic disease,52662,33253899,Prevalence and mortality in children with cong...,"This study determined the prevalence, mortalit...",https://doi.org/10.1016/J.Annepidem.2020.11.007,"Annals of EpidemiologyVolume 56, April 2021, P...",annals epidemiologyvolume april pages articlep...


In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

In [25]:


# Assuming 'preprocessed_texts' is a list of your preprocessed documents
vectorizer = TfidfVectorizer()
preprocessed_texts = df['cleaned_text'].values
tfidf_matrix = vectorizer.fit_transform(preprocessed_texts)

def find_unique_relevant_documents(query, tfidf_matrix, top_n=5):
    query_vector = vectorizer.transform([query])
    cosine_similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    
    # Use argsort for indices and then unique to filter out duplicates
    unique_indices = np.unique(cosine_similarities.argsort()[::-1], return_index=True)[1]
    
    # Sort unique indices based on original similarity scores
    sorted_unique_indices = unique_indices[np.argsort(-cosine_similarities[unique_indices])]

    # Select top_n indices
    relevant_indices = sorted_unique_indices[:top_n]

    return relevant_indices, cosine_similarities[relevant_indices]


def summarize(text, language="english", sentences_count = 6):
    parser = PlaintextParser.from_string(text, Tokenizer(language))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, sentences_count)
    return ' '.join([str(sentence) for sentence in summary])


In [73]:
def search_documents(query, df = df, tfidf_matrix = tfidf_matrix, top_n=5, summary_sentences=5):
    relevant_indices, relevance_scores = find_unique_relevant_documents(query, tfidf_matrix, top_n)
    relevant_docs = df.iloc[relevant_indices]
    relevant_docs['relevance_score'] = relevance_scores
    
    # Keep only the rows with unique PMIDs
    relevant_docs = relevant_docs.drop_duplicates(subset='PMID', ignore_index=True)
    
    # Combine all the full_text
    combined_text = ' '.join(relevant_docs['full_text'].tolist())
    
    # Generate summary of the combined text
    # summary = summarize(combined_text, sentences_count=summary_sentences)
    summary = summarize(combined_text)
    
    return relevant_docs, summary


In [74]:
from transformers import AutoTokenizer, AutoModelWithLMHead

def summarize(text, max_length=130):
    """
    Summarizes the given text to about 5 sentences.

    Args:
    text (str): The text to be summarized.
    max_length (int): The maximum length of the summary (in tokens).

    Returns:
    str: The summarized text.
    """
    # Load model and tokenizer for 'facebook/bart-large-cnn'
    tokenizer = AutoTokenizer.from_pretrained('t5-base')
    model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)

    # Tokenize and encode the text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)

    # Generate summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=100, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

In [35]:
import warnings
# Ignore warnings
warnings.filterwarnings("ignore")

In [97]:
query = "Diagnostic criteria for rare hepatic diseases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Diagnostic criteria for rare hepatic diseases
Summary:


'hepatic processes are dysregulated in type 1 and type 2 diabetes mellitus. this imbalance contributes to hyperglycaemia in the fasted and postprandial states. in this review, we discuss the in vivo regulation of these hepatic glucose fluxes. we also highlight the importance of indirect (extrahepatic) control of hepatic gluconeogenesis and direct (hepatic) control of hepatic glycogen metabolism.'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,28731034,Regulation of hepatic glucose metabolism in he...,https://doi.org/10.1038/Nrendo.2017.80,0.32134
1,31148902,Autoimmune hepatitis and IgG4-related disease.,https://doi.org/10.3748/Wjg.V25.I19.2308,0.307815
2,36031652,Infantile hepatic hemangioma and hepatic mesen...,https://doi.org/10.1002/Ajmg.A.62767,0.28111
3,34928431,Challenges in the diagnosis of neurofibromatos...,https://doi.org/10.1007/S00439-021-02410-Z,0.279409


In [96]:
query = "Long-term outcomes of rare systemic diseases in childhood"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Long-term outcomes of rare systemic diseases in childhood
Summary:


'childhood glaucoma is classified in primary and secondary congenital glaucoma. CYP1B1 gene mutations seem to account for 87% of familial cases. childhood absence epilepsy (CAE) is considered easily manageable with medication. childhood absence epilepsy (epi) is most frequent in school-aged girls and is most frequent in school-aged girls. epi is a rare chromosomal disorder with distinctive phenotypic expressivity.'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,33466173,Prenatal diagnosis and ultrasonographic findin...,https://doi.org/10.1016/J.Amjms.2021.09.004,0.202516
1,26451378,Rare Diseases Leading to Childhood Glaucoma: E...,https://doi.org/10.2478/Bjmg-2019-0024,0.181602
2,31733607,B cell targeted therapies in autoimmune disease.,https://doi.org/10.1016/J.Coi.2019.09.004,0.177663
3,28325560,Long-term prognosis of childhood absence epile...,https://doi.org/10.1016/J.Nrl.2016.12.005,0.170796
4,38045990,Adult Phenotype of <i>SYNGAP1</i>-DEE.,https://doi.org/10.1016/J.Seizure.2009.04.004,0.170199


In [95]:
query = "Fertility treatment outcomes in rare infertility cases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Fertility treatment outcomes in rare infertility cases
Summary:


'Approximately half of male factor infertility cases have no known cause. but, it is likely that the majority of idiopathic male factor infertility cases have some unidentified genetic basis. Approximately 15% of couples worldwide are affected with infertility, attributed to a male co-factor in about half of the cases. most studies evaluate a single gene, an approach that is very inefficient in the context of male infertility.'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,24574159,Genetic susceptibility to male infertility: ne...,https://doi.org/10.1111/J.2047-2927.2014.00188.X,0.395717
1,32622407,Fertility issues and pregnancy outcomes in Tur...,https://doi.org/10.1016/J.Fertnstert.2020.03.002,0.377151
2,31355535,Clinical and molecular characterization of Y m...,https://doi.org/10.1111/Andr.12686,0.339412
3,33071633,Genetic disorders and male infertility.,https://doi.org/10.1002/Rmb2.12336,0.336226


In [94]:
query = "Symptoms of rare respiratory diseases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Symptoms of rare respiratory diseases
Summary:


"costello syndrome (CS) is a multisystem disorder caused by heterozygous germline mutations in the HRAS proto-oncogene. in the neonatal period, respiratory complications are seen in approximately 78% of patients with transient respiratory distress reported in 45% of neonates. other more specific respiratory diagnoses were reported in 62% of patients, the majority of which comprised disorders of the upper and lower respiratory tract. this review summarizes neonatal respiratory therapy's advances and available strategies."

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,28877977,Rare pulmonary diseases: a common fight.,https://doi.org/10.1038/S41576-022-00478-5,0.466163
1,27102959,Respiratory system involvement in Costello syn...,https://doi.org/10.1002/Ajmg.A.37655,0.410557
2,35382987,New developments in neonatal respiratory manag...,https://doi.org/10.1016/J.Pedneo.2022.02.002,0.28509
3,32016537,Respiratory drive in the acute respiratory dis...,https://doi.org/10.1007/S00134-020-05942-6,0.273988


In [91]:
query = "Biologic drug therapies for rare skin diseases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Biologic drug therapies for rare skin diseases
Summary:


'skin cancer has been the leading type of cancer worldwide. skin fibrosis is characterized by excessive fibroblast proliferation and extracellular matrix deposition in the dermis. effective anti-scarring therapeutics remain an unmet need, says dr. sanjay gupta. gupta: the mortality rate of severe drug eruptions can reach up to 50% if not treated early. gupta: a multidisciplinary approach is required for acute management '

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,35127174,Skin cancer biology and barriers to treatment:...,https://doi.org/10.1016/J.Jare.2021.06.014,0.223879
1,20065636,Canakinumab.,https://doi.org/10.1038/Nm.3804,0.212849
2,34273058,Current Perspectives on Severe Drug Eruption.,https://doi.org/10.1007/S12016-021-08859-0,0.211538
3,31493000,The IL-4/IL-13 axis in skin fibrosis and scarr...,https://doi.org/10.1007/S00403-019-01972-3,0.203446
4,34641447,Controlled Drug Delivery Systems: Current Stat...,https://doi.org/10.1016/J.Chest.2021.10.010,0.202744


In [90]:
query = "Renal replacement therapy options for rare renal diseases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Renal replacement therapy options for rare renal diseases
Summary:


'atherosclerotic renal artery stenosis is the leading cause of secondary hypertension. it may lead to resistant (refractory) hypertension, progressive decline in renal function, and cardiac destabilization syndromes. the best strategy to approach the treatment of atherosclerotic renal artery stenosis is to revascularize patients with renal artery stenosis. the authors review techniques to optimize patient selection, to minimize procedural complications, and to facilitate durable pat'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,21071977,Tuberous sclerosis complex renal disease.,https://doi.org/10.1159/000320891,0.422263
1,30898248,When and How Should We Revascularize Patients ...,https://doi.org/10.1016/J.Jcin.2018.10.023,0.363647
2,33603889,Management of acute intradialytic cardiovascul...,https://doi.org/10.1016/J.Jcin.2018.10.023,0.362259
3,30274631,Renal Manifestations of Inflammatory Bowel Dis...,https://doi.org/10.1016/J.Rdc.2018.06.007,0.35866
4,25316474,Renal artery stenosis in association with cong...,https://doi.org/10.1038/S41375-021-01290-6,0.358251


In [89]:
query = "Prenatal diagnostic methods for rare teratologic disorders"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Prenatal diagnostic methods for rare teratologic disorders
Summary:


'angelman syndrome (AS) is a rare neurodevelopmental disorder caused by mutation or deletion of the maternally inherited UBE3A allele. genetic tests can detect the chromosome 15q11-q13 deletion that is the most common cause of AS. prenatal testing combined with prenatal treatment has the potential to revolutionize how clinicians detect and treat babies before they are symptomatic. this pioneering prenatal treatment path for AS will lay the foundation for treating other syndromic neurodevelopmental disorders'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,27590389,Prenatal diagnosis of partial monosomy 5p (5p1...,https://doi.org/10.1016/J.Tjog.2016.06.014,0.156389
1,31490639,Prenatal treatment path for angelman syndrome ...,https://doi.org/10.1002/Aur.2203,0.151882
2,27208505,A diagnostic approach to mild bleeding disorders.,https://doi.org/10.1111/Jth.13368,0.140649
3,22030049,Prenatal diagnosis of mosaic trisomy 8: clinic...,https://doi.org/10.1016/J.Tjog.2011.07.013,0.130093
4,36034547,Prenatal ultrasound phenotypic and genetic eti...,https://doi.org/10.1016/J.Ajhg.2010.06.010,0.128718


In [88]:
query = "Robotic surgery in the treatment of rare urogenital diseases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Robotic surgery in the treatment of rare urogenital diseases
Summary:


'robotic surgery has shown clear utility and advantages in the adult population. but its role in pediatrics remains controversial. robotic pyeloplasty is a standard of care in older children and has even been performed in infants and re-do surgery. future advances in robotics will help to advance the field of robotic surgery in pediatric urology. back to mail online home. back to the page you came from. back to the page you came from.'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,29264208,Robotic surgery in pediatric urology.,https://doi.org/10.1016/J.Ajur.2016.06.002,0.403642
1,35038623,Intensified Hyposensitization Is an Effective ...,https://doi.org/10.1016/J.Fertnstert.2019.11.021,0.187432
2,35159098,A Comprehensive Commentary on the Multilocular...,https://doi.org/10.3348/Jksr.2021.0022,0.175941
3,27833904,Surgical Treatment after Failed Primary Correc...,https://doi.org/10.1002/Uog.7721,0.167779
4,35440058,Molecular cytogenetic characterization of part...,https://doi.org/10.1159/000511972,0.166603


In [85]:
query = "Gene editing prospects in treating rare genetic diseases"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Gene editing prospects in treating rare genetic diseases
Summary:


'gene editing is a powerful tool for genome and cell engineering. exemplified by CRISPR-Cas, gene editing could cause DNA damage. catalytically inactive dCas9 promotes knock-in of long sequences in mammalian cells. dCas9-SSAP editor has low on-target errors and minimal off-target effects. dCas9-SSAP is effective for inserting kilobase-scale sequences '

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,35145221,dCas9-based gene editing for cleavage-free gen...,https://doi.org/10.1038/S41556-021-00836-1,0.207978
1,30524313,New Approaches to Tay-Sachs Disease Therapy.,https://doi.org/10.1038/S41586-019-1711-4,0.194631
2,31634902,Search-and-replace genome editing without doub...,https://doi.org/10.1038/S41586-019-1711-4,0.194631
3,35741383,Gene Editing-Based Technologies for <i>Beta-he...,https://doi.org/10.1056/Nejmoa1705342,0.181994
4,37298481,Precision Editing as a Therapeutic Approach fo...,https://doi.org/10.1080/20009666.2018.1536241,0.173292


In [78]:
query = "Role of immunotherapy in treating rare endocrine disorders"
relevant_docs, summary = search_documents(query)
print(f"Query: {query}")
print(f"Summary:")
display(summary)
print(f"Relevant Documents:")
display(relevant_docs[['PMID', 'title','article_url','relevance_score']])

Query: Role of immunotherapy in treating rare endocrine disorders
Summary:


'the therapeutic landscape for non-melanoma skin cancer has expanded with the development of effective and targeted immunotherapy. in this review, we discuss relevant ophthalmic findings associated with key disorders of the pancreas, thyroid gland, and hypothalamic-pituitary axis. diabetes mellitus (DM) is the leading cause of blindness among adults under 75 years of age. ophthalmic retinopathy (DR) has significant predictive value for cardiovascular disease and mortality in patients with'

Relevant Documents:


Unnamed: 0,PMID,title,article_url,relevance_score
0,34448958,Immunotherapy for Non-melanoma Skin Cancer.,https://doi.org/10.2340/00015555-0359,0.345397
1,29184811,Dermatologic manifestations of endocrine disor...,https://doi.org/10.21037/Tp.2017.09.08,0.27106
2,31687637,The Natural History of a Man With Ovotesticula...,https://doi.org/10.1210/Js.2019-00241,0.250915
3,36613725,Childhood Hypophosphatasia Associated with a N...,https://doi.org/10.1210/Js.2017-00307,0.250636
4,34884378,Hypophosphatasia.,https://doi.org/10.1210/Js.2017-00307,0.250636
