In [None]:
# Install necessary libraries
!pip install arxiv pandas biopython transformers ipywidgets


In [7]:
import arxiv
import pandas as pd
from Bio import Entrez
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display
from datetime import datetime

# Set up Entrez email (required for NCBI's E-utilities)
Entrez.email = "your.email@example.com"  # Replace with your email

# Function to fetch papers from PubMed
def fetch_papers_pubmed(query='deep learning AND classification AND fracture AND bone', max_results=5):
    search_handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    search_results = Entrez.read(search_handle)
    search_handle.close()

    id_list = search_results["IdList"]

    papers = []
    if id_list:
        fetch_handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="xml")
        abstracts = Entrez.read(fetch_handle)
        fetch_handle.close()

        for article in abstracts['PubmedArticle']:
            article_title = article['MedlineCitation']['Article']['ArticleTitle']
            abstract = ' '.join(article['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', ['']))

            publish_date = article['MedlineCitation'].get('DateCompleted') or article['MedlineCitation'].get('DateRevised') or None
            if publish_date:
                publish_date = f"{publish_date['Year']}-{publish_date['Month']}-{publish_date['Day']}"
            else:
                publish_date = "Unknown"

            article_id = article['MedlineCitation']['PMID']  # Get PubMed ID
            article_url = f"https://pubmed.ncbi.nlm.nih.gov/{article_id}/"  # Construct the PubMed URL

            papers.append({
                'published': publish_date,
                'title': article_title,
                'abstract': abstract,
                'url': article_url,
                'publisher': 'PubMed'
            })

    return pd.DataFrame(papers)

# Function to fetch papers from arXiv
def fetch_papers_arxiv(query='deep learning AND classification AND fracture AND bone', max_results=5):
    search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )

    papers = []
    for result in search.results():
        article_url = result.entry_id  # Get arXiv URL
        papers.append({
            'published': result.published.strftime('%Y-%m-%d'),
            'title': result.title,
            'abstract': result.summary,
            'url': article_url,
            'categories': ','.join(result.categories),
            'publisher': 'arXiv'
        })

    return pd.DataFrame(papers)

# Fetch papers from both arXiv and PubMed
df_arxiv = fetch_papers_arxiv(max_results=5)
df_pubmed = fetch_papers_pubmed(max_results=5)

# Combine both dataframes
df_combined = pd.concat([df_arxiv, df_pubmed], ignore_index=True)

# Convert the published date to datetime for proper sorting
df_combined['published'] = pd.to_datetime(df_combined['published'], errors='coerce')

# Sort the combined dataframe by the published date
df_combined = df_combined.sort_values(by='published', ascending=False).reset_index(drop=True)

# Convert 'published' column to string format for dropdown display
df_combined['published_str'] = df_combined['published'].dt.strftime('%Y-%m-%d')

# Initialize the summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)  # Set device to 0 for GPU, -1 for CPU

# Function to summarize the abstract
def summarize_abstract(abstract_text):
    if abstract_text:  # Check if abstract is not empty
        summary = summarizer(abstract_text, max_length=150, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    else:
        return "No abstract available."

# Apply summarization
df_combined['summary'] = df_combined['abstract'].apply(summarize_abstract)

# Prepare dropdown options with publication date and publisher
dropdown_options = [
    f"{row['published_str']} - {row['title']} (Publisher: {row['publisher']})"
    for _, row in df_combined.iterrows()
]

# Create interactive widgets
# Dropdown widget for article titles
dropdown = widgets.Dropdown(
    options=dropdown_options,
    description='Article:',
    disabled=False
)

# Create an HTML widget to display the summary with a clickable link
summary_html = widgets.HTML(
    value='',
    placeholder='Summary will appear here...',
    description='Summary:',
    layout=widgets.Layout(width='100%', height='250px')
)

# Function to update the summary based on the selected article
def update_summary(change):
    selected_option = dropdown.value
    selected_title = selected_option.split(' - ', 1)[1].split(' (', 1)[0]
    selected_row = df_combined[df_combined['title'] == selected_title].iloc[0]
    summary = selected_row['summary']
    publish_date = selected_row['published_str']
    publisher = selected_row['publisher']
    article_url = selected_row['url']

    # Use HTML to create a clickable link and format the summary text
    summary_html.value = (f"<b>Title:</b> {selected_title}<br>"
                          f"<b>Published:</b> {publish_date}<br>"
                          f"<b>Publisher:</b> {publisher}<br>"
                          f"<b>URL:</b> <a href='{article_url}' target='_blank'>Click here to view the article</a><br><br>"
                          f"<b>Summary:</b><br>{summary}")

# Link the dropdown widget to the update function
dropdown.observe(update_summary, names='value')

# Display the widgets
display(dropdown, summary_html)


  for result in search.results():
Your max_length is set to 150, but your input_length is only 111. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)


Dropdown(description='Article:', options=('2024-06-22 - Bone Fracture Classification using Transfer Learning (…

HTML(value='', description='Summary:', layout=Layout(height='250px', width='100%'), placeholder='Summary will …