In [11]:
import requests
import xml.etree.ElementTree as ET
import time
import json
import csv
from datetime import datetime

def fetch_pubmed_articles(query, max_results=5):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    
    search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&usehistory=y&retmode=json"
    search_response = requests.get(search_url).json()
    
    total_count = int(search_response['esearchresult']['count'])
    webenv = search_response['esearchresult']['webenv']
    query_key = search_response['esearchresult']['querykey']
    
    print(f"Total articles found: {total_count}")
    print(f"Fetching up to {max_results} articles...")

    fetch_url = f"{base_url}efetch.fcgi?db=pubmed&query_key={query_key}&WebEnv={webenv}&retmode=xml&retmax={max_results}"
    fetch_response = requests.get(fetch_url)
    
    root = ET.fromstring(fetch_response.content)
    
    articles = []
    for article in root.findall(".//PubmedArticle"):
        pmid = article.find(".//PMID").text
        title_element = article.find(".//ArticleTitle")
        title = title_element.text if title_element is not None else "Title not available."
        abstract_element = article.find(".//Abstract/AbstractText")
        abstract = abstract_element.text if abstract_element is not None else "Abstract not available."
        
        articles.append({
            'id': pmid,
            'title': title,
            'abstract': abstract
        })
    
    return articles

def process_article_with_ollama(article, model_name="llama3"):
    url = "http://localhost:11434/api/generate"
    
    prompt = f"""
    Analyze the following research article on assisted dying:

    Title: {article['title']}

    Abstract: {article['abstract'][:1000]}

    Please provide a structured response addressing the following points:
    1. Study Type: Is this study theoretical or empirical?
    2. Research Methods: What specific research methods were used (e.g., survey, self-report, experiment, meta-analysis)?
    3. Justification: Briefly explain the reasoning behind your classification.

    Response:
    """
    
    payload = {
        "model": model_name,
        "prompt": prompt,
        "stream": False
    }
    
    try:
        response = requests.post(url, json=payload)
        response.raise_for_status()
        result = response.json()['response']
    except requests.exceptions.RequestException as e:
        result = f"Error: Unable to process article. {str(e)}"
    
    return {
        'article_id': article['id'],
        'title': article['title'],
        'analysis': result
    }

def save_results(results, base_filename):
    # Save as JSON
    json_filename = f"{base_filename}.json"
    with open(json_filename, 'w') as f:
        json.dump(results, f, indent=2)
    print(f"Results saved to {json_filename}")

    # Save as CSV
    csv_filename = f"{base_filename}.csv"
    with open(csv_filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['article_id', 'title', 'analysis'])
        writer.writeheader()
        for result in results:
            writer.writerow(result)
    print(f"Results saved to {csv_filename}")

def run_integration_test(query="Assisted+dying", max_articles=5, model_name="llama3"):
    print(f"Running integration test with query: '{query}', max articles: {max_articles}, model: {model_name}")
    
    # Fetch articles
    articles = fetch_pubmed_articles(query, max_articles)
    print(f"Successfully fetched {len(articles)} articles.")
    
    # Process articles with Ollama
    results = []
    for i, article in enumerate(articles, 1):
        print(f"\nProcessing article {i} of {len(articles)}...")
        result = process_article_with_ollama(article, model_name)
        results.append(result)
        print(f"Article ID: {result['article_id']}")
        print(f"Title: {result['title']}")
        print("Analysis:")
        print(result['analysis'])
        print("-" * 50)
        time.sleep(1)  # Be respectful to the Ollama API
    
    print("\nIntegration test complete!")
    return results

# Run the integration test
if __name__ == "__main__":
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    base_filename = f"pubmed_ollama_test_results_{timestamp}"
    
    results = run_integration_test()
    save_results(results, base_filename)



Running integration test with query: 'Assisted+dying', max articles: 5, model: llama3
Total articles found: 3877
Fetching up to 5 articles...
Successfully fetched 5 articles.

Processing article 1 of 5...
Article ID: 39160544
DOI: 10.1186/s12904-024-01542-z
Title: Non-invasive technology to assess hydration status in advanced cancer to explore relationships between fluid status and symptoms: an observational study using bioelectrical impedance analysis.
Abstract: Oral fluid intake decreases in advanced cancer in the dying phase of illness. There is inadequate ev...
Study Type: **
Study Type Justification: **
The title "Non-invasive technology to assess hydration status in advanced cancer" suggests that the study is an empirical investigation, as it involves collecting data on specific variables (hydration status) using a particular method (bioelectrical impedance analysis). The abstract also mentions examining relationships between hydration status and clinical variables, which implies