## Research Blogger: Automated Paper Summarization and Social Media Posting 
### This script fetches the latest AI research paper from arXiv, generates a blog post summary, creates a GitHub Pages post, and shares it on Threads.

In [3]:
import arxiv
import openai
import requests
import base64
import time
from datetime import datetime, timedelta, timezone
from IPython.display import display, Markdown

# Configuration
THREADS_USER_ID = 'your-Threads-user-id'
THREADS_ACCESS_TOKEN = "your-Threads-access-token"
APP_SECRET = 'your-Threads-secret-key'
OPENAI_API_KEY = "your-openAI-key"
GITHUB_TOKEN = "your-github-access-token"
GITHUB_REPO = "your-github-username/repo-name"
GITHUB_PAGES_SITE = 'username.github.io/repo-name'  # e.g. porkpy.github.io/research_blogger

# Set up OpenAI API client
openai.api_key = OPENAI_API_KEY

def fetch_latest_paper(category):
    """
    Fetches the latest paper from arXiv in the specified category.
    
    Args:
    category (str): The arXiv category to search in (e.g., "cs.AI" for Artificial Intelligence)
    
    Returns:
    arxiv.Result or None: The latest paper if found within the last week, otherwise None
    """
    client = arxiv.Client()
    last_week = datetime.now(timezone.utc) - timedelta(days=7)
    search = arxiv.Search(
        query=f"cat:{category}",
        max_results=1,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )
    results = list(client.results(search))
    if results and results[0].published.replace(tzinfo=timezone.utc) > last_week:
        return results[0]
    return None

def generate_blog_post(paper):
    """
    Generates a blog post summary of the given paper using OpenAI's GPT model.
    
    Args:
    paper (arxiv.Result): The paper to summarize
    
    Returns:
    str or None: The generated blog post content, or None if generation fails
    """
    authors = ', '.join([author.name for author in paper.authors])
    prompt = f"""Write an engaging blog post about the following scientific paper:

Title: {paper.title}
Authors: {authors}
Abstract: {paper.summary}

The blog post should:
1. Explain the main findings in simple terms
2. Discuss potential real-world implications
3. Be engaging and accessible to a general audience
4. Be around 300-400 words long

Blog Post:"""

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that writes engaging blog posts about scientific papers."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=500,
            temperature=0.7
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error generating blog post: {e}")
        return None

def generate_threads_post(paper, blog_post_url):
    """
    Generates a short post for Threads about the given paper.
    
    Args:
    paper (arxiv.Result): The paper to create a post about
    blog_post_url (str): The URL of the full blog post
    
    Returns:
    str or None: The generated Threads post content, or None if generation fails
    """
    prompt = f"""Create a short, engaging post for Threads (max 500 characters) about this scientific paper:
    Title: {paper.title}
    
    Include a brief highlight and end with "Read more: [URL]"
    """
    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that creates engaging social media posts about scientific papers."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=100,
            temperature=0.7
        )
        return f"{response.choices[0].message.content.strip()} Read more: {blog_post_url}"
    except Exception as e:
        print(f"Error generating Threads post: {e}")
        return None

def create_media_container(access_token, user_id, text):
    """
    Creates a media container for a Threads post.
    
    Args:
    access_token (str): Threads API access token
    user_id (str): Threads user ID
    text (str): Content of the post
    
    Returns:
    dict: JSON response from the Threads API
    """
    url = f"https://graph.threads.net/v1.0/{user_id}/threads"
    params = {
        "media_type": "TEXT",
        "text": text,
        "access_token": access_token
    }
    response = requests.post(url, params=params)
    print(f"Create Media Container Status Code: {response.status_code}")
    return response.json()

def publish_thread(access_token, user_id, creation_id):
    """
    Publishes a thread on Threads.
    
    Args:
    access_token (str): Threads API access token
    user_id (str): Threads user ID
    creation_id (str): ID of the media container to publish
    
    Returns:
    dict: JSON response from the Threads API
    """
    url = f"https://graph.threads.net/v1.0/{user_id}/threads_publish"
    params = {
        "creation_id": creation_id,
        "access_token": access_token
    }
    response = requests.post(url, params=params)
    print(f"Publish Thread Status Code: {response.status_code}")
    return response.json()

def post_to_threads(text):
    """
    Posts content to Threads.
    
    Args:
    text (str): Content to post
    
    Returns:
    bool: True if posting was successful, False otherwise
    """
    try:
        container = create_media_container(THREADS_ACCESS_TOKEN, THREADS_USER_ID, text)
        if 'id' in container:
            print("Waiting 30 seconds before publishing...")
            time.sleep(30)
            publish_result = publish_thread(THREADS_ACCESS_TOKEN, THREADS_USER_ID, container['id'])
            if 'id' in publish_result:
                print("Successfully posted to Threads!")
                return True
        print("Failed to post to Threads.")
        return False
    except Exception as e:
        print(f"Error posting to Threads: {e}")
        return False

def create_github_blog_post(title, content, date):
    """
    Creates a new blog post on GitHub Pages.
    
    Args:
    title (str): Title of the blog post
    content (str): Content of the blog post
    date (str): Date of the blog post in YYYY-MM-DD format
    
    Returns:
    tuple: (bool, str) - Success status and URL of the created post
    """
    file_name = f"{date}-{title.lower().replace(' ', '-')[:50]}.md"
    file_content = f"""---
layout: post
title: "{title}"
date: {date} {datetime.now().strftime('%H:%M:%S +0000')}
categories: [blog, AI, research]
---
{content}
"""
    encoded_content = base64.b64encode(file_content.encode("utf-8")).decode("utf-8")
    url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/_posts/{file_name}"
    headers = {
        "Authorization": f"token {GITHUB_TOKEN}",
        "Accept": "application/vnd.github.v3+json"
    }
    
    # Check if file already exists
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        print(f"Blog post already exists: {file_name}")
        return False, ""

    # File doesn't exist, create new file
    data = {
        "message": f"Add new blog post: {title}",
        "content": encoded_content
    }

    response = requests.put(url, headers=headers, json=data)
    if response.status_code != 201:
        print(f"GitHub API Error: {response.status_code}")
        print(f"Response content: {response.text}")
        return False, ""
    
    return True, f"https://{GITHUB_PAGES_SITE}/{date.replace('-', '/')}/{title.lower().replace(' ', '-')[:50]}"

def main():
    """
    Main function to orchestrate the entire process:
    1. Fetch the latest AI paper from arXiv
    2. Generate a blog post summary
    3. Create a GitHub Pages post
    4. Share on Threads
    """
    category = "cs.AI"  # Category for Artificial Intelligence
    try:
        # Fetch the latest paper
        paper = fetch_latest_paper(category)
        if not paper:
            print(f"No recent papers found in category: {category}")
            return

        display(Markdown(f"## Processing: {paper.title}"))
        
        # Generate blog post
        blog_post = generate_blog_post(paper)
        if not blog_post:
            print(f"Failed to generate blog post for: {paper.title}")
            return

        display(Markdown(f"### Original Paper: [{paper.entry_id}]({paper.entry_id})"))
        display(Markdown(blog_post))
        
        # Save to GitHub
        date = datetime.now().strftime("%Y-%m-%d")
        success, post_url = create_github_blog_post(paper.title, blog_post, date)
        
        if success:
            print(f"Successfully created blog post on GitHub: {post_url}")
        else:
            print("Failed to create blog post on GitHub or post already exists.")
            # Generate a generic URL for existing posts
            post_url = f"https://{GITHUB_PAGES_SITE}/{date.replace('-', '/')}/{paper.title.lower().replace(' ', '-')[:50]}"

        # Generate and post to Threads, regardless of GitHub success
        threads_post = generate_threads_post(paper, post_url)
        if threads_post:
            display(Markdown(f"### Threads Post:\n{threads_post}"))
            if post_to_threads(threads_post):
                print("Successfully posted to Threads!")
            else:
                print("Failed to post to Threads.")
        else:
            print("Failed to generate Threads post.")

    except Exception as e:
        print(f"An error occurred: {e}")
        raise  # This will display the full error traceback

if __name__ == "__main__":
    main()

## Processing: Multi-View and Multi-Scale Alignment for Contrastive Language-Image Pre-training in Mammography

### Original Paper: [http://arxiv.org/abs/2409.18119v1](http://arxiv.org/abs/2409.18119v1)

Are you ready to revolutionize mammography with cutting-edge technology? A recent scientific paper by Yuexi Du, John Onofrey, and Nicha C. Dvornek has unveiled a groundbreaking approach to enhance contrastive language-image pre-training (CLIP) specifically for mammography. Let's dive into the exciting world of medical image analysis and explore the implications of this innovative research.

In simple terms, the researchers have developed a method called Multi-View and Multi-Scale Alignment (MaMA) to improve the performance of CLIP in analyzing mammography images. Mammograms, which are X-ray images of the breast, present unique challenges such as limited labeled data, high-resolution images with small areas of interest, and data imbalance. To tackle these issues, the MaMA method leverages the multi-view nature of mammography and incorporates a specialized local alignment module to focus on detailed features in high-resolution images.

But what does this mean for real-world applications? The implications are profound. By enhancing the capabilities of CLIP in mammography analysis, healthcare professionals can potentially improve early detection of breast cancer and other abnormalities. With more accurate and efficient image analysis tools, radiologists and clinicians can make better-informed decisions, leading to earlier interventions and improved patient outcomes.

Furthermore, the MaMA method offers a parameter-efficient fine-tuning approach, allowing for effective utilization of large language models pre-trained with medical knowledge. This approach addresses the data limitations in mammography and demonstrates superior performance compared to existing methods, even with a significantly smaller model size.

Imagine a future where mammograms are analyzed with unprecedented accuracy and efficiency, thanks to advanced AI technologies like MaMA. This research not only pushes the boundaries of medical imaging but also highlights the potential of AI in revolutionizing healthcare diagnostics.

In conclusion, the study by Du, Onofrey, and Dvornek opens up new possibilities for enhancing mammography analysis through multi-view and multi-scale alignment techniques. As we look towards a future where technology plays an increasingly vital role in healthcare, this research serves as a beacon of hope for more effective and accessible diagnostic tools in the fight against breast cancer and other diseases. Exciting times lie ahead in the intersection of AI and medical imaging, where innovation paves the way for a healthier tomorrow.

Blog post already exists: 2024-09-28-multi-view-and-multi-scale-alignment-for-contrasti.md
Failed to create blog post on GitHub or post already exists.


### Threads Post:
🔬✨ Exciting new research alert! A study on enhancing contrastive language-image pre-training in mammography through multi-view and multi-scale alignment has just been published. 📸🔍 Dive into the details to learn how this innovative approach could revolutionize early breast cancer detection. Read more: [URL] #BreastCancer #MedicalImaging #ResearchDiscovery 🌟📚 Read more: https://porkpy.github.io/research_blogger/2024/09/28/multi-view-and-multi-scale-alignment-for-contrasti

Create Media Container Status Code: 200
Waiting 30 seconds before publishing...
Publish Thread Status Code: 200
Successfully posted to Threads!
Successfully posted to Threads!
