In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import time
import json
from typing import List, Dict


# Check to see that Ollama is Running Correctly

In [2]:
import requests
import json

def test_ollama_model(model_name="llama3", prompt="Explain the concept of machine learning in one sentence."):
    url = "http://localhost:11434/api/generate"
    
    payload = {
        "model": model_name,
        "prompt": prompt,
        "stream": False
    }
    
    try:
        response = requests.post(url, json=payload)
        response.raise_for_status()  # Raises a HTTPError if the status is 4xx, 5xx
        
        result = response.json()
        
        print("Ollama is working correctly!")
        print(f"Model: {model_name}")
        print(f"Prompt: {prompt}")
        print(f"Response: {result['response']}")
        
    except requests.exceptions.RequestException as e:
        print(f"An error occurred while connecting to Ollama: {e}")
        print("Please make sure Ollama is running and the model is correctly installed.")

# Run the test
if __name__ == "__main__":
    test_ollama_model()

Ollama is working correctly!
Model: llama3
Prompt: Explain the concept of machine learning in one sentence.
Response: Machine learning is a subfield of artificial intelligence that enables computers to learn from data without being explicitly programmed, allowing them to recognize patterns, make predictions, and improve their performance over time through iterative training on large datasets.


# Test API Pulls

In [3]:
import requests
import xml.etree.ElementTree as ET
import time

def test_pubmed_api_pull(query, max_results=50):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    
    # First, get the total count of results and set up the search
    search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&usehistory=y&retmode=json"
    search_response = requests.get(search_url).json()
    
    total_count = int(search_response['esearchresult']['count'])
    webenv = search_response['esearchresult']['webenv']
    query_key = search_response['esearchresult']['querykey']
    
    print(f"Total articles found: {total_count}")
    print(f"Fetching up to {max_results} articles...")

    # Fetch the articles
    fetch_url = f"{base_url}efetch.fcgi?db=pubmed&query_key={query_key}&WebEnv={webenv}&retmode=xml&retmax={max_results}"
    fetch_response = requests.get(fetch_url)
    
    root = ET.fromstring(fetch_response.content)
    
    articles = []
    for article in root.findall(".//PubmedArticle"):
        pmid = article.find(".//PMID").text
        title_element = article.find(".//ArticleTitle")
        title = title_element.text if title_element is not None else "Title not available."
        abstract_element = article.find(".//Abstract/AbstractText")
        abstract = abstract_element.text if abstract_element is not None else "Abstract not available."
        
        articles.append({
            'id': pmid,
            'title': title,
            'abstract': abstract[:100] + "..." if abstract else "Abstract not available."  # Truncate abstract for display
        })
    
    return articles

def display_articles(articles):
    for i, article in enumerate(articles, 1):
        print(f"\nArticle {i}:")
        print(f"ID: {article['id']}")
        print(f"Title: {article['title']}")
        print(f"Abstract: {article['abstract']}")
        print("-" * 50)

# Run the test
if __name__ == "__main__":
    query = "Assisted+dying"
    max_results = 50

    articles = test_pubmed_api_pull(query, max_results)
    print(f"\nSuccessfully fetched {len(articles)} articles.")
    
    # Display the first 5 articles
    display_articles(articles[:5])
    
    print(f"\nTotal articles fetched: {len(articles)}")
    print("To see all articles, remove the slice from display_articles(articles[:5])")

# Uncomment the following line to display all fetched articles
display_articles(articles)

Total articles found: 3877
Fetching up to 50 articles...

Successfully fetched 50 articles.

Article 1:
ID: 39160544
Title: Non-invasive technology to assess hydration status in advanced cancer to explore relationships between fluid status and symptoms: an observational study using bioelectrical impedance analysis.
Abstract: Oral fluid intake decreases in advanced cancer in the dying phase of illness. There is inadequate ev...
--------------------------------------------------

Article 2:
ID: 39157533
Title: Readiness of nurses when faced with a patient's death.
Abstract: The death of a patient negatively affects the professional dimension of nurses' functioning and also...
--------------------------------------------------

Article 3:
ID: 39157418
Title: 'There is no such word as palliative care for us at the moment': A mixed-method study exploring the perceptions of healthcare professionals on the need for palliative care in Bhutan.
Abstract: The need for palliative care is ever-incr

# Fetch PubMed Articles

In [None]:
def fetch_pubmed_articles(query: str) -> List[Dict[str, str]]:
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    
    # First, get the total count of results
    search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax=0&usehistory=y&retmode=json"
    search_response = requests.get(search_url).json()
    
    total_count = int(search_response['esearchresult']['count'])
    webenv = search_response['esearchresult']['webenv']
    query_key = search_response['esearchresult']['querykey']
    
    print(f"Total articles found: {total_count}")
    
    articles = []
    batch_size = 500  # NCBI recommends no more than 500 per request
    
    for start in range(0, total_count, batch_size):
        print(f"Fetching articles {start+1} to {min(start+batch_size, total_count)}...")
        
        fetch_url = f"{base_url}efetch.fcgi?db=pubmed&query_key={query_key}&WebEnv={webenv}&retstart={start}&retmax={batch_size}&retmode=xml"
        fetch_response = requests.get(fetch_url)
        
        root = ET.fromstring(fetch_response.content)
        
        for article in root.findall(".//PubmedArticle"):
            pmid = article.find(".//PMID").text
            title_element = article.find(".//ArticleTitle")
            title = title_element.text if title_element is not None else "Title not available."
            abstract_element = article.find(".//Abstract/AbstractText")
            abstract = abstract_element.text if abstract_element is not None else "Abstract not available."
            
            articles.append({
                'id': pmid,
                'title': title,
                'abstract': abstract
            })
        
        time.sleep(1)  # Be respectful to the NCBI servers
    
    return articles

# Define Llama3-7b Process

In [None]:
def process_article_with_ollama(article: Dict[str, str], model_name: str = "llama3") -> Dict[str, str]:
    prompt = f"""
    Analyze the following research article on assisted dying:

    Title: {article['title']}

    Abstract: {article['abstract'][:1000]}

    Please provide a structured response addressing the following points:
    1. Study Type: Is this study theoretical or empirical?
    2. Research Methods: What specific research methods were used (e.g., survey, self-report, experiment, meta-analysis)?
    3. Justification: Briefly explain the reasoning behind your classification.

    Response:
    """
    
    response = requests.post('http://localhost:11434/api/generate', 
                             json={
                                 "model": model_name,
                                 "prompt": prompt,
                                 "stream": False
                             })
    
    if response.status_code == 200:
        result = response.json()['response']
    else:
        result = f"Error: Unable to process article. Status code: {response.status_code}"
    
    return {
        'article_id': article['id'],
        'title': article['title'],
        'abstract': article['abstract'],
        'classification': result
    }


#  Process Articles with Llama3-7b

In [None]:
def process_all_articles(articles: List[Dict[str, str]], model_name: str = "llama3") -> List[Dict[str, str]]:
    print(f"Processing {len(articles)} articles...")
    results = []
    for i, article in enumerate(articles, 1):
        print(f"Processing article {i} of {len(articles)}...")
        result = process_article_with_ollama(article, model_name)
        results.append(result)
        time.sleep(1)  # Add a small delay between processing articles
    return results

def save_results(results: List[Dict[str, str]], filename: str = 'assisted_dying_analysis_ollama.csv'):
    print("Storing results...")
    df = pd.DataFrame(results)
    df.to_csv(filename, index=False)
    print(f"Analysis complete. Results stored in '{filename}'")