In [None]:
import requests
from urllib.parse import quote
import random
import re

# API URL
url = 'https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=JOURNAL%3A"Eur%20Heart%20J%20Imaging%20Methods%20Pract"&resultType=core&pageSize=1000&sort=P_PDATE_D desc&format=json'

# Fetch data from the API
print("Fetching data from API...")
response = requests.get(url)
print("Response received with status code:", response.status_code)
data = response.json()

# Extract relevant article details
articles = data.get('resultList', {}).get('result', [])
print(f"Number of articles fetched: {len(articles)}")

def generate_social_media_post(title, first_author_lastname):
    """
    Generate an engaging, professional, and scientific social media post.

    Args:
        title (str): The title of the article.
        first_author_lastname (str): The last name of the first author.

    Returns:
        str: A generated social media post.
    """
    templates = [
        f"'{title}' by {first_author_lastname} et al. brings a fresh perspective to cardiovascular imaging. Read it now in #EHJIMP!",
        f"Explore '{title}' by {first_author_lastname} et al., redefining innovation in cardiovascular research. Discover more in #EHJIMP.",
        f"'{title}'—an essential read from {first_author_lastname} et al., showcasing cutting-edge science in #EHJIMP!",
        f"Innovative research alert: '{title}' by {first_author_lastname} et al. is now available in #EHJIMP.",
        f"Leading the way in cardiovascular imaging: '{title}' by {first_author_lastname} et al., featured in #EHJIMP.",
        f"Explore '{title}', the latest in groundbreaking research by {first_author_lastname} et al. Read it in #EHJIMP!",
        f"Don't miss the study '{title}' by {first_author_lastname} et al., a key contribution to cardiovascular science. Now in #EHJIMP!",
        f"{first_author_lastname} et al. present impactful findings in '{title}'. Read this important study in #EHJIMP.",
        f"New in #EHJIMP: '{title}' by {first_author_lastname} et al., offering novel insights into imaging science.",
        f"Highlighting '{title}'—{first_author_lastname} et al. reveal transformative discoveries in #EHJIMP.",
        f"Discover how '{title}' by {first_author_lastname} et al. is shaping the future of imaging research. Read in #EHJIMP!",
        f"Exciting research from {first_author_lastname} et al.: '{title}' explores new dimensions in cardiovascular imaging. Featured in #EHJIMP.",
        f"Explore the paradigm-shifting study '{title}' by {first_author_lastname} et al., now in #EHJIMP!",
        f"'Revolutionizing cardiovascular imaging: {first_author_lastname} et al. unveil '{title}' in #EHJIMP.",
        f"Stay at the forefront of cardiovascular science: Read '{title}' by {first_author_lastname} et al., now in #EHJIMP!"
        f"Discover cutting-edge insights in '{title}'. {first_author_lastname} et al. present their latest findings in #EHJIMP!",
        f"The latest research by {first_author_lastname} et al. explores '{title}'. Dive deeper in #EHJIMP!",
        f"Explore the groundbreaking study '{title}' by {first_author_lastname} et al., now featured in #EHJIMP.",
        f"Exciting developments in cardiovascular imaging: '{title}' by {first_author_lastname} et al. Read more in #EHJIMP!",
        f"'{title}'—a must-read study by {first_author_lastname} et al., pushing the boundaries of knowledge in #EHJIMP!",
        f"Delve into the latest advancements in '{title}'. {first_author_lastname} et al. share groundbreaking findings in #EHJIMP!",
        f"New insights unveiled by {first_author_lastname} et al. in '{title}'. Discover the details in #EHJIMP!",
        f"Check out the innovative study '{title}' by {first_author_lastname} et al., now spotlighted in #EHJIMP.",
        f"Significant strides in cardiovascular imaging: '{title}' authored by {first_author_lastname} et al. Explore more in #EHJIMP!",
        f"'{title}'—a pivotal contribution by {first_author_lastname} et al., expanding horizons in #EHJIMP!",
        f"Uncover pioneering research in '{title}' by {first_author_lastname} et al. Read more in #EHJIMP!",
        f"Discover how {first_author_lastname} et al. are advancing the field with '{title}', now featured in #EHJIMP.",
        f"The study '{title}' by {first_author_lastname} et al. offers fresh perspectives in cardiovascular imaging. Explore it in #EHJIMP!",
        f"Breaking new ground in imaging science: '{title}' by {first_author_lastname} et al., highlighted in #EHJIMP.",
        f"Transformative research '{title}' by {first_author_lastname} et al. is shaping the future of cardiovascular imaging. Learn more in #EHJIMP!"
    ]
    return random.choice(templates)

html_content = '''

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=0.8">
    <title>Codex Editoris | The EHJIMP Edition</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            line-height: 1.2;
            margin: 5px;
            padding: 15px;
        }
        .article {
            margin-top: 15px;
            margin-bottom: 15px;
            padding: 20px;
            border-radius: 0;
            box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.5), -5px -5px 15px rgba(255, 255, 255, 0.7);
        }
        h1 {
            color: #056608;
            padding: 0px;
            margin-bottom: 10px;
            font-size: 1.8em;
            font-weight: bold;
        }
        h2 {
            color: #056608;
            margin: 0;
            padding: 10px;
            font-size: 1.2em;
        }
        .button-download {
            background-color: #b30018;
            margin: 5px;
            padding: 5px;
            border-radius: 0;
            text-decoration: none;
            color: white;
            font-size: 0.9em;
            white-space: nowrap;
            cursor: pointer;
        }
        .details, .meta {
            margin: 10px;
            font-size: 0.9em;
            color: #555;
        }
        .meta {
            display: flex;
            gap: 15px;
            flex-wrap: wrap;
        }
        .meta strong {
            font-weight: bold;
        }
        .author-row {
            display: inline-flex;
            align-items: center;
            gap: 5px;
            flex-wrap: nowrap;
            margin: 5px;
        }
        .author-name {
            white-space: nowrap;
        }
        .button {
            background-color: #b30018;
            padding: 5px;
            text-decoration: none;
            color: black;
            font-size: 0.9em;
            white-space: nowrap;
            cursor: pointer;
        }
        .linkedin {
            background-color: #0077b5;
            padding:5px;
            color: white;
            cursor: pointer;
        }
        .x-button {
            background-color: #000;
            padding: 5px;
            color: white;
            cursor: pointer;
        }
        .open-access {
            background-color: #d4edda;
            color: #155724;
            padding: 5px;
            border-radius: 4px;
            font-size: 0.8em;
            display: inline-block;
            margin: 5px;
        }
        .copy-button {
            background-color: #b30018;
            color: white;
            font-weight: bold;
            padding: 5px;
            border: none;
            cursor: pointer;
        }
        a {
            color: #b30018;
            text-decoration: underline;
        }
        .abstract-header {
            font-weight: bold;
            display: inline;
            margin: 0;
            color: #056608;
        }
        .abstract-content {
            margin: 10px;
            padding: 10px;
            display: inline;
            font-size: 0.8em;
        }
        .abstract-container {
            background-color: #f5f5f5;
            padding: 10px;
            margin-top: 10px;
            font-size: 0.8em;
        }
        .cc-container {
            background-color: transparent;
            margin-top: 20px;
            margin-bottom: 20px;
            font-size: 0.7em;
        }
        .embed-container {
            display: flex;
            gap: 5px;
            margin: 5px;
        }
        .embed-container > div {
            flex: 1;
            padding: 5px;
            background-color: transparent;
        }
        .social-post {
            background-color: transparent;
            padding: 10px;
            margin: 5px;
            font-size: 1em;
            color: #056608;
            border: 1px gray;
            font-weight: bold;
            transition: transform 0.8s ease-in-out; /* smooth transition for scaling */
        }
        .social-post:hover {
            transform: scale(1.01);
        }
        .full-container {
            background-color: transparent;
            padding: 20px;
            margin: 15px;
            font-size: 1em;
            color: #056608;
            box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.5), -5px -5px 15px rgba(255, 255, 255, 0.7);
        }
        .editors-container {
            background-color: transparent;
            padding: 20px;
            margin: 15px;
            font-size: 1em;
            color: #056608;
            box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.5), -5px -5px 15px rgba(255, 255, 255, 0.7);
            transition: transform 0.8s ease-in-out; /* smooth transition for scaling */
        }
        .editors-container:hover {
            transform: scale(1.01);
        }
        .search-container {
            margin: 10px;
        }
        .search-container input {
            margin: 3px;
            padding: 5px;
            font-size: 1em;
        }
        .search-container button {
            margin-top: 10px;
            margin-bottom: 10px;
            background-color: #056608;
            padding: 5px;
            font-size: 0.9em;
            color: white;
            font-weight: bold;
            cursor: pointer;
            border: none;
        }
    </style>
    <script>
        function filterArticles() {
            const keywordInput = document.getElementById('keyword').value.toLowerCase();
            const startDate = document.getElementById('start-date').value;
            const endDate = document.getElementById('end-date').value;
            const articles = document.querySelectorAll('.article');

            articles.forEach(article => {
                const abstract = article.querySelector('.abstract-content').textContent.toLowerCase();
                const keywords = article.querySelector('.details').textContent.toLowerCase();
                const title = article.querySelector('h2').textContent.toLowerCase();
                const pubDate = article.querySelector('.meta').textContent.match(/Published on\s([^<]*)/i)?.[1];

                let matchesKeyword = abstract.includes(keywordInput) || keywords.includes(keywordInput) || title.includes(keywordInput);
                let matchesDate = true;

                if (startDate && endDate) {
                    matchesDate = pubDate >= startDate && pubDate <= endDate;
                }

                if (matchesKeyword && matchesDate) {
                    article.style.display = '';
                } else {
                    article.style.display = 'none';
                }
            });
        }

        function resetFilters() {
            document.getElementById('keyword').value = '';
            document.getElementById('start-date').value = '';
            document.getElementById('end-date').value = '';
            const articles = document.querySelectorAll('.article');
            articles.forEach(article => {
                article.style.display = '';
            });
        }

        function copyToClipboard(text) {
            navigator.clipboard.writeText(text).then(() => {
                alert('Copied to clipboard!');
            });
        }
    </script>
    <script async src="https://d1bxh8uas1mnw7.cloudfront.net/assets/embed.js"></script>
</head>
<body>
    <h1>Codex Editoris | EHJIMP edition</h1>
    <p>Codex Editoris is an automated dataset built on the DEITY Framework to enhance semantic annotation of publications datasets.</p>
    <div class="social-post">Thanks to @Alessia @Alex @Aldo @Ali @Chris @Ruben @Edoardo @Iva @Mahmoud @Hany @Absamea @Nazar & @Julia. </div>
    <p>| S. Anwer © The Adimension 2024.</p>
    <div class="editors-container">
        <p id="x-accounts-editors">@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX</p>
        <button class="copy-button" onclick="copyToClipboard(document.getElementById('x-accounts-editors').textContent)">Copy Editors @ X</button>
    </div>
        <div class="editors-container">
        <p id="LIN-accounts-editors">📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging.</p>
        <button class="copy-button" onclick="copyToClipboard(document.getElementById('LIN-accounts-editors').textContent)">Copy Editors @ LinkedIn</button>
    </div>
    <div class="search-container">
        <label for="keyword">Search:</label>
        <input type="text" id="keyword" placeholder="Enter keywords...">
        <label for="start-date">From:</label>
        <input type="date" id="start-date">
        <label for="end-date">To:</label>
        <input type="date" id="end-date">
        <button onclick="filterArticles()">Filter</button>
        <button onclick="resetFilters()">Reset</button>
    </div>

'''

# Generate HTML content for each article
for article in articles:
    # Log all journal titles for debugging
    journal = article.get('journalInfo', {}).get('journal', {}).get('title', '')
    print(f"Journal title fetched: {journal}")

    # Check if the journal title matches using regex for flexibility
    if not re.search(r"the international journal of cardiovascular imaging", journal, re.IGNORECASE):
        print(f"Skipping article with journal title: {journal}")
        continue

    title = article.get('title', 'No Title')
    abstract = article.get('abstractText', 'No Abstract Available').replace('\n', ' ').strip()
    keywords = ', '.join(article.get('keywordList', {}).get('keyword', []))
    doi = article.get('doi', None)
    doi_url = f'https://doi.org/{doi}' if doi else '#'
    links = article.get('fullTextUrlList', {}).get('fullTextUrl', [])
    issue = article.get('journalInfo', {}).get('issue', 'N/A')
    volume = article.get('journalInfo', {}).get('volume', 'N/A')
    pub_date = article.get('journalInfo', {}).get('printPublicationDate', 'Unknown Date')
    pmid = article.get('pmid', 'N/A')
    pmcid = article.get('pmcid', 'N/A')

    print(f"Processing article: {title}")

    # Check for open access
    is_open_access = article.get('isOpenAccess', 'N') == 'Y'

    # Extract authors
    authors = article.get('authorList', {}).get('author', [])
    first_author_lastname = authors[0].get('lastName', 'Unknown') if authors else 'Unknown'
    author_rows = ""
    for author in authors:
        first_name = author.get('firstName', '')
        last_name = author.get('lastName', '')
        full_name = f"{first_name} {last_name}".strip()
        linkedin_link = f'https://www.linkedin.com/search/results/people/?keywords={quote(full_name)}'
        x_link = f'https://twitter.com/search?q={quote(full_name)}'

        author_rows += f'''
        <span class="author-row">
            <span class="author-name">{full_name}</span>
            <a href="{linkedin_link}" target="_blank" class="button linkedin">In</a>
            <a href="{x_link}" target="_blank" class="button x-button">X</a>
        </span>
        '''

    # Generate PDF link
    pdf_url = next(
        (url['url'] for url in links if url.get('documentStyle') == 'pdf'), '#'
    )

    # Generate social media post
    social_post = generate_social_media_post(title, first_author_lastname)

    html_content += f'''
    <div class="article">
        <h2>{title}</h2>
        <div class="meta">
            {'<span class="open-access">Open Access</span>' if is_open_access else ''}
            <strong>Issue</strong> {issue} <strong>Vol</strong> {volume} <strong>Published on</strong> {pub_date} <strong>PMID</strong> {pmid} <strong>PMCID</strong> {pmcid}
            <a href="{pdf_url}" target="_blank" class="button-download">Download PDF</a>
        </div>
        <div class="details">
            <strong>Keywords:</strong> {keywords}<br>
            <strong>DOI:</strong> <a href="{doi_url}" target="_blank">{doi_url}</a> <button class="copy-button" onclick="copyToClipboard('{doi_url}')">Copy DOI</button>
        </div>
        <div class="details">
            <strong>Authors:</strong> {author_rows}
        </div>
        <div class="abstract-container">
            <span class="abstract-header">Abstract:</span>
            <span class="abstract-content">{abstract}</span>
        </div>
        <div class="social-post">
            <strong>Social Media Post:</strong><br>
            {social_post} <button class="copy-button" onclick="copyToClipboard(`{social_post}`)">Copy Text</button>
        </div>
        <div class="embed-container">
            <div>
                <span class="__dimensions_badge_embed__" data-pmid="{pmid}" data-legend="always"></span>
                <script async src="https://badge.dimensions.ai/badge.js" charset="utf-8"></script>
            </div>
            <div>
                <div data-badge-details="right" data-badge-type="medium-donut" data-doi="{doi}" data-legend="always" data-condensed="true" data-hide-no-mentions="false" class="altmetric-embed"></div>
            </div>
        </div>
    </div>
    '''

html_content += '''
</body>
</html>
'''

# Save to an HTML file
output_file = 'Codex_Editoris_EHJIMP.html'
print("Saving the HTML file...")
with open(output_file, 'w', encoding='utf-8') as file:
    file.write(html_content)

print(f"HTML file '{output_file}' generated successfully.")

Fetching data from API...
Response received with status code: 200
Number of articles fetched: 300
Journal title fetched: The international journal of cardiovascular imaging
Processing article: Visualization of complex mitral valve anatomy using real-time three-dimensional computer graphics.
Journal title fetched: The international journal of cardiovascular imaging
Processing article: Monitoring ATTR cardiomyopathy - addressing an unmet need by multimodality imaging.
Journal title fetched: The international journal of cardiovascular imaging
Processing article: Validation of a hand-held ultrasound device in the evaluation of aortic stenosis.
Journal title fetched: The international journal of cardiovascular imaging
Processing article: Myocardial ischaemia following COVID-19: a cardiovascular magnetic resonance study.
Journal title fetched: The international journal of cardiovascular imaging
Processing article: The relationship between symptoms and regurgitant severity in primary mitral r

#Python on Google Colab

In [None]:
pip install requests beautifulsoup4 pillow



In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                entitlement_tag = item.find('div', class_="app-entitlement__text")
                access = "Open" if entitlement_tag and "open" in entitlement_tag.get_text(strip=True).lower() else "Partial"
                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with varied introductory phrases
def prepare_post_text(article):
    title = article['title']
    first_author_last_name = article['first_author_last_name']
    doi_link = article['doi_link']

    # Map keywords to topic-specific hashtag
    if any(keyword in article['keywords'] for keyword in ["Echo", "Echocardiography", "Cardiac Echo", "Ultrasound"]):
        topic_hashtag = "#EchoFirst"
    elif any(keyword in article['keywords'] for keyword in ["CT", "Computed Tomography", "Cat Scan", "CT Scan"]):
        topic_hashtag = "#YesCCT"
    elif any(keyword in article['keywords'] for keyword in ["PET", "Nuclear", "Nuclear Medicine", "MPI", "SPECT", "Myocardial Perfusion", "Myocardial Perfusion Imaging"]):
        topic_hashtag = "#ThinkPET #CVNuc"
    elif any(keyword in article['keywords'] for keyword in ["CMR", "Cardiac MRI", "MRI", "Magnetic Resonance", "Cardiac Magnetic Resonance"]):
        topic_hashtag = "#WhyCMR"
    else:
        topic_hashtag = "#CVImaging"

    twitter_handles = ("@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon "
                       "@SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri "
                       "@MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX")

    first_line_templates = [
        f"🚀 Exploring {title} - {first_author_last_name} et al. provide insights on:",
        f"🔍 {title} by {first_author_last_name} et al. highlights:",
        f"🌟 Discover key findings in {title} - presented by {first_author_last_name} et al.:",
        f"📊 New perspectives in {title}, shared by {first_author_last_name} et al.:",
        f"💡 {first_author_last_name} et al. on {title}:"
    ]
    post_message = f"{random.choice(first_line_templates)} {doi_link}"
    post_footer = f"{topic_hashtag} {twitter_handles}"
    post_text = f"{post_message}\n\n{post_footer}"

    return post_text

# Step 3: Generate Color Palette Image and Encode to Base64
def generate_palette_image():
    width, height = 800, 400
    section_height = height // len(colors)
    palette_image = Image.new("RGB", (width, height), colors["Ivory"])
    draw = ImageDraw.Draw(palette_image)

    y_start = 0
    for color_name, hex_code in colors.items():
        draw.rectangle([0, y_start, width, y_start + section_height], fill=hex_code)
        label_color = colors["Ivory"] if color_name in ["Dark Red", "Bright Red", "Deep Maroon", "Dark Charcoal"] else colors["Dark Charcoal"]
        draw.text((10, y_start + section_height // 2 - 10), f"{color_name} ({hex_code})", fill=label_color)
        y_start += section_height

    buffered = io.BytesIO()
    palette_image.save(buffered, format="PNG")
    base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return f"data:image/png;base64,{base64_image}"

def generate_html(articles, base64_image):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.6; }}
            .post {{ margin: 20px 0; padding: 15px; border: 1px solid #ccc; border-radius: 8px; }}
            .post img {{ max-width: 50%; margin-top: 10px; }}
            .post-text {{ font-size: 16px; margin-top: 10px; color: blue; }}
            h3, h4 {{ margin: 10px 0; font-size: 1.1em; }}
            p {{ margin: 5px 0; }}
            .button-row {{ display: flex; gap: 10px; margin-top: 10px; }}
            .copy-button {{ background-color: #4CAF50; color: white; padding: 5px 10px; border: none; cursor: pointer; border-radius: 5px; }}
            .copy-button:hover {{ background-color: #45a049; }}
        </style>
        <script>
            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div><img src="{base64_image}" alt="Color Palette"></div>
    """

    for idx, article in enumerate(articles):
        post_text = prepare_post_text(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <h3>Information</h3>
            <p><strong>Access:</strong> {article['access']}</p>
            <h4>Category</h4>
            <p>{article['keywords']}</p>
            <h4>Date</h4>
            <p>{article['publication_date']}</p>
            <h4>First Author</h4>
            <p>{article['first_author']}</p>
            <div class="button-row">
                <button class="copy-button" onclick="window.open('{first_author_linkedin}', '_blank')">First Author LinkedIn</button>
                <button class="copy-button" onclick="window.open('{first_author_x}', '_blank')">First Author X</button>
            </div>
            <h4>Last Author</h4>
            <p>{article['last_author'] if article['last_author'] else 'None'}</p>
            <div class="button-row">
                <button class="copy-button" onclick="window.open('{last_author_linkedin}', '_blank')">Last Author LinkedIn</button>
                <button class="copy-button" onclick="window.open('{last_author_x}', '_blank')">Last Author X</button>
            </div>
            <h4>The Post</h4>
            <div class="post-text" id="postText{idx}">{post_text}</div>
            <div class="button-row">
                <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post Text</button>
                <button class="copy-button" onclick="copyToClipboard('{article['doi_link']}', 'DOI Link')">Copy DOI Link</button>
            </div>
            <img src="{article['image_url']}" alt="Article Image">
            <div class="button-row">
                <button class="copy-button" onclick="copyToClipboard('{article['image_url']}', 'Media Link')">Copy Media Link</button>
            </div>
        </div>
        """

    html_content += "</body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    base64_image = generate_palette_image()
    generate_html(articles, base64_image)

main()


Scraping URL: https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=2
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=3
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=4
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sor

In [None]:
!pip freeze > requirements.txt


## New version

In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.6; }}
            .post {{ margin: 20px 0; padding: 15px; border: 1px solid #ccc; border-radius: 8px; }}
            .filter-bar {{ margin-bottom: 20px; }}
            .button-row {{ display: flex; gap: 10px; margin-top: 10px; }}
            .copy-button {{ padding: 5px 10px; border: none; cursor: pointer; border-radius: 0; }}
            .linkedin-button {{ background-color: #0072b1; color: white; }}
            .x-button {{ background-color: #f5f5dc; color: black; border: 1px solid black; }}
            .collapsible-content {{ display: none; transition: all 0.3s ease; }}
        </style>
        <script>
            function toggleCollapse(contentId) {{
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {{
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                }} else {{
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }}
            }}

            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">

            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)

        html_content += f"""
        <div class="post">
            <h2 style="color: #8C1B24; font-weight: bold; cursor: pointer; display: flex; align-items: center;" onclick="toggleCollapse('content{idx}')">
                {article['title']}
                {"<span class='open-access-label' style='background-color: #8C1B24; color: white; padding: 2px 8px; margin-left: 15px; border-radius: 5px;'>Open Access</span>" if article['access'] == "Open Access" else ""}
                <span style="margin-left: 15px;">- Published on {article['publication_date']}</span>
                <span id="indicator{idx}" style="margin-left: auto;">[+]</span>
            </h2>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container" style="display: flex; align-items: flex-start; gap: 20px;">
                    <div class="post-details" style="flex: 1;">
                        <h3 style="color: #8C1B24; font-weight: bold;">1. Post</h3>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">1. Copy Post</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">2. DOI</h3>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">2. Copy DOI</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">3A. X Accounts</h3>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">3A. Copy X accounts</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">3B. LinkedIn Accounts</h3>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">3B. Copy LinkedIn accounts</button>
                        </div>
                    </div>
                    <div class="post-image" style="flex: 0 0 20%;">
                        <img id="image{idx}" src="{article['image_url']}" alt="Article Image" style="width: 100%; transition: all 0.3s ease;">
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.6; }}
            .post {{ margin: 20px 0; padding: 15px; border: 1px solid #ccc; border-radius: 8px; }}
            .filter-bar {{ margin-bottom: 20px; }}
            .button-row {{ display: flex; gap: 10px; margin-top: 10px; }}
            .copy-button {{ padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }}
            .copy-button:hover {{ background-color: #f0f0f0; }}
            .linkedin-button {{ background-color: #0072b1; color: white; }}
            .x-button {{ background-color: #f5f5dc; color: black; border: 1px solid black; }}
            .collapsible-content {{ display: none; transition: all 0.3s ease; }}
            .expand-button {{ color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }}
            .post-image-thumbnail {{ flex: 0 0 20%; }}
            .post-image-full {{ width: 100%; transition: all 0.3s ease; margin-top: 10px; }}
            h2 {{ font-size: 1.2em; }}
            .open-access-label, .publication-date {{ display: block; font-size: 0.9em; font-weight: bold; color: #8C1B24; }}
        </style>
        <script>
            function toggleCollapse(contentId) {{
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {{
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                }} else {{
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }}
            }}

            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}

            function saveImage(imgId) {{
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>
            <div class="post-image-thumbnail">
                <img id="thumbnail{idx}" src="{article['image_url']}" alt="Article Thumbnail">
            </div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container" style="display: flex; align-items: flex-start; gap: 20px;">
                    <div class="post-details" style="flex: 1;">
                        <h3 style="color: #8C1B24; font-weight: bold;">1. Post</h3>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">1. Copy Post</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">2. DOI</h3>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">2. Copy DOI</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">3A. X Accounts</h3>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">3A. Copy X accounts</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">3B. LinkedIn Accounts</h3>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">3B. Copy LinkedIn accounts</button>
                        </div>
                    </div>
                    <div class="post-image-full">
                        <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                            <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.6; }}
            .post {{ margin: 20px 0; padding: 15px; border: 1px solid #ccc; border-radius: 8px; }}
            .filter-bar {{ margin-bottom: 20px; }}
            .button-row {{ display: flex; gap: 10px; margin-top: 10px; }}
            .copy-button {{ padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }}
            .copy-button:hover {{ background-color: #f0f0f0; }}
            .linkedin-button {{ background-color: #0072b1; color: white; }}
            .x-button {{ background-color: #f5f5dc; color: black; border: 1px solid black; }}
            .collapsible-content {{ display: none; transition: all 0.3s ease; }}
            .expand-button {{ color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }}
            .post-image-thumbnail {{ flex: 0 0 20%; }}
            .post-image-full {{ width: 100%; transition: all 0.3s ease; margin-top: 10px; }}
            h2 {{ font-size: 1.2em; }}
            .open-access-label, .publication-date {{ display: block; font-size: 0.9em; font-weight: bold; color: #8C1B24; }}
        </style>
        <script>
            function toggleCollapse(contentId) {{
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {{
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                }} else {{
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }}
            }}

            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}

            function saveImage(imgId) {{
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>
            <div class="post-image-thumbnail">
                <img id="thumbnail{idx}" src="{article['image_url']}" alt="Article Thumbnail">
            </div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container" style="display: flex; align-items: flex-start; gap: 20px;">
                    <div class="post-details" style="flex: 1;">
                        <h3 style="color: #8C1B24; font-weight: bold;">1. Post</h3>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">1. Copy Post</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">2. DOI</h3>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">2. Copy DOI</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">3A. X Accounts</h3>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">3A. Copy X accounts</button>
                        </div>

                        <h3 style="color: #8C1B24; font-weight: bold;">3B. LinkedIn Accounts</h3>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">3B. Copy LinkedIn accounts</button>
                        </div>
                    </div>
                    <div class="post-image-full">
                        <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                            <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()

In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.4; }}
            .post {{ margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }}
            .filter-bar {{ margin-bottom: 10px; }}
            .button-row {{ display: flex; gap: 5px; margin-top: 5px; }}
            .copy-button, .search-button {{ padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }}
            .copy-button:hover, .search-button:hover {{ background-color: #f0f0f0; }}
            .linkedin-button {{ background-color: #0072b1; color: white; }}
            .x-button {{ background-color: #f5f5dc; color: black; border: 1px solid black; }}
            .collapsible-content {{ display: none; transition: all 0.3s ease; }}
            .expand-button {{ color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }}
            h2 {{ font-size: 1.2em; color: #8C1B24; }}
            .open-access-label {{ background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }}
            .publication-date {{ font-size: 0.9em; color: black; }}
        </style>
        <script>
            function toggleCollapse(contentId) {{
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {{
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                }} else {{
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }}
            }}

            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.4; }}
            .post {{ margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }}
            .filter-bar {{ margin-bottom: 10px; }}
            .button-row {{ display: flex; gap: 5px; margin-top: 5px; }}
            .copy-button, .search-button {{ padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }}
            .copy-button:hover, .search-button:hover {{ background-color: #f0f0f0; }}
            .linkedin-button {{ background-color: #0072b1; color: white; }}
            .x-button {{ background-color: #f5f5dc; color: black; border: 1px solid black; }}
            .collapsible-content {{ display: none; transition: all 0.3s ease; }}
            .expand-button {{ color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }}
            h2 {{ font-size: 1.2em; color: #8C1B24; }}
            .open-access-label {{ background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }}
            .publication-date {{ font-size: 0.9em; color: black; }}
        </style>
        <script>
            function toggleCollapse(contentId) {{
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {{
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                }} else {{
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }}
            }}

            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1] if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.4; }}
            .post {{ margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }}
            .filter-bar {{ margin-bottom: 10px; }}
            .button-row {{ display: flex; gap: 5px; margin-top: 5px; }}
            .copy-button, .search-button {{ padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }}
            .copy-button:hover, .search-button:hover {{ background-color: #f0f0f0; }}
            .linkedin-button {{ background-color: #0072b1; color: white; }}
            .x-button {{ background-color: #f5f5dc; color: black; border: 1px solid black; }}
            .collapsible-content {{ display: none; transition: all 0.3s ease; }}
            .expand-button {{ color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }}
            h2 {{ font-size: 1.2em; color: #8C1B24; }}
            .open-access-label {{ background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }}
            .publication-date {{ font-size: 0.9em; color: black; }}
            .post-image {{ width: 50%; margin-top: 10px; }}
        </style>
        <script>
            function toggleCollapse(contentId) {{
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {{
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                }} else {{
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }}
            }}

            function copyToClipboard(content, message) {{
                navigator.clipboard.writeText(content).then(function() {{
                    alert(message + ' copied to clipboard!');
                }}).catch(function(err) {{
                    console.error('Could not copy text: ', err);
                }});
            }}

            function saveImage(imgId) {{
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }}
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>

                        <h4>4. Media</h4>
                        <div class="post-image">
                            <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                            <div class="button-row">
                                <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                                <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("generated_posts.html", "w") as file:
        file.write(html_content)
    print("HTML file 'generated_posts.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()

# STABLE - pre r2 without Java Query filter**

In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1].lstrip('...') if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body { font-family: Arial, sans-serif; line-height: 1.4; }
            .post { margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }
            .filter-bar { margin-bottom: 10px; }
            .button-row { display: flex; gap: 5px; margin-top: 5px; }
            .copy-button, .search-button { padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }
            .copy-button:hover, .search-button:hover { background-color: #f0f0f0; }
            .linkedin-button { background-color: #0072b1; color: white; }
            .x-button { background-color: #f5f5dc; color: black; border: 1px solid black; }
            .collapsible-content { display: none; transition: all 0.3s ease; }
            .expand-button { color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }
            h2 { font-size: 1.2em; color: #8C1B24; }
            .open-access-label { background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }
            .publication-date { font-size: 0.9em; color: black; }
            .post-image { width: 50%; margin-top: 10px; }
        </style>
        <script>
            function toggleCollapse(contentId) {
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                } else {
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }
            }

            function copyToClipboard(content, message) {
                navigator.clipboard.writeText(content).then(function() {
                    alert(message + ' copied to clipboard!');
                }).catch(function(err) {
                    console.error('Could not copy text: ', err);
                });
            }

            function saveImage(imgId) {
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }

            function filterArticles() {
                const startDate = document.getElementById('date-start').value;
                const endDate = document.getElementById('date-end').value;
                const categories = Array.from(document.getElementById('category-filter').selectedOptions).map(option => option.value);
                const openAccessOnly = document.getElementById('open-access-filter').checked;

                document.querySelectorAll('.post').forEach(post => {
                    const publicationDate = post.querySelector('.publication-date').innerText.replace('- Published on ', '').trim();
                    const categoriesText = post.querySelector('.post-details span').innerText;
                    const isOpenAccess = post.querySelector('.open-access-label').innerText === 'Open Access';

                    let isVisible = true;

                    if (startDate && publicationDate < startDate) {
                        isVisible = false;
                    }

                    if (endDate && publicationDate > endDate) {
                        isVisible = false;
                    }

                    if (categories.length > 0 && !categories.some(category => categoriesText.includes(category))) {
                        isVisible = false;
                    }

                    if (openAccessOnly && !isOpenAccess) {
                        isVisible = false;
                    }

                    post.style.display = isVisible ? 'block' : 'none';
                });
            }
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start" onchange="filterArticles()">
            <input type="date" id="date-end" onchange="filterArticles()">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple onchange="filterArticles()">
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>

                        <h4>4. Media</h4>
                        <div class="post-image">
                            <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                            <div class="button-row">
                                <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                                <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("EHJIMP_codex_editoris.html", "w") as file:
        file.write(html_content)
    print("HTML file 'EHJIMP_codex_editoris.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


Scraping URL: https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=2
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=3
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=4
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sor

In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1].lstrip('...') if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text = f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:\n"

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body { font-family: Arial, sans-serif; line-height: 1.4; }
            .post { margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }
            .filter-bar { margin-bottom: 10px; }
            .button-row { display: flex; gap: 5px; margin-top: 5px; }
            .copy-button, .search-button { padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }
            .copy-button:hover, .search-button:hover { background-color: #f0f0f0; }
            .linkedin-button { background-color: #0072b1; color: white; }
            .x-button { background-color: #f5f5dc; color: black; border: 1px solid black; }
            .collapsible-content { display: none; transition: all 0.3s ease; }
            .expand-button { color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }
            h2 { font-size: 1.2em; color: #8C1B24; }
            .open-access-label { background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }
            .publication-date { font-size: 0.9em; color: black; }
            .post-image { width: 50%; margin-top: 10px; }
            .reset-button { margin-left: 15px; padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; }
            .reset-button:hover { background-color: #f0f0f0; }
        </style>
        <script>
            function toggleCollapse(contentId) {
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                } else {
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }
            }

            function copyToClipboard(content, message) {
                navigator.clipboard.writeText(content).then(function() {
                    alert(message + ' copied to clipboard!');
                }).catch(function(err) {
                    console.error('Could not copy text: ', err);
                });
            }

            function saveImage(imgId) {
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }

            function filterArticles() {
                const startDate = document.getElementById('date-start').value;
                const endDate = document.getElementById('date-end').value;
                const categories = Array.from(document.getElementById('category-filter').selectedOptions).map(option => option.value);
                const openAccessOnly = document.getElementById('open-access-filter').checked;

                document.querySelectorAll('.post').forEach(post => {
                    const publicationDate = post.querySelector('.publication-date').innerText.replace('- Published on ', '').trim();
                    const categoriesText = post.querySelector('.post-details span').innerText;
                    const isOpenAccess = post.querySelector('.open-access-label').innerText === 'Open Access';

                    let isVisible = true;

                    if (startDate && publicationDate < startDate) {
                        isVisible = false;
                    }

                    if (endDate && publicationDate > endDate) {
                        isVisible = false;
                    }

                    if (categories.length > 0 && !categories.some(category => categoriesText.includes(category))) {
                        isVisible = false;
                    }

                    if (openAccessOnly && !isOpenAccess) {
                        isVisible = false;
                    }

                    post.style.display = isVisible ? 'block' : 'none';
                });
            }

            function resetFilters() {
                document.getElementById('date-start').value = '';
                document.getElementById('date-end').value = '';
                document.getElementById('category-filter').selectedIndex = -1;
                document.getElementById('open-access-filter').checked = false;
                filterArticles();
            }
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start" onchange="filterArticles()">
            <input type="date" id="date-end" onchange="filterArticles()">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple onchange="filterArticles()">
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter" onchange="filterArticles()"> Open Access Only
            </label>
            <button class="reset-button" onclick="resetFilters()">Reset Filters</button>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>

                        <h4>4. Media</h4>
                        <div class="post-image">
                            <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                            <div class="button-row">
                                <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                                <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("EHJIMP_codex_editoris.html", "w") as file:
        file.write(html_content)
    print("HTML file 'EHJIMP_codex_editoris.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


Scraping URL: https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=2
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=3
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=4
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sor

In [None]:
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1].lstrip('...') if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text_options = [
        f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:",
        f"🔍 Dive into the details of {article['title']} by {article['first_author_last_name']} et al.",
        f"✨ New research spotlight: {article['title']} - {article['first_author_last_name']} and colleagues share their findings.",
        f"📊 A fresh perspective on {article['title']} from {article['first_author_last_name']} et al."
    ]
    post_text = random.choice(post_text_options)

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body { font-family: Arial, sans-serif; line-height: 1.4; }
            .post { margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }
            .filter-bar { margin-bottom: 10px; }
            .button-row { display: flex; gap: 5px; margin-top: 5px; }
            .copy-button, .search-button { padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }
            .copy-button:hover, .search-button:hover { background-color: #f0f0f0; }
            .linkedin-button { background-color: #0072b1; color: white; }
            .x-button { background-color: #f5f5dc; color: black; border: 1px solid black; }
            .collapsible-content { display: none; transition: all 0.3s ease; }
            .expand-button { color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }
            h2 { font-size: 1.2em; color: #8C1B24; }
            .open-access-label { background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }
            .publication-date { font-size: 0.9em; color: black; }
            .post-image { width: 50%; margin-top: 10px; }
            .reset-button, .search-button { margin-left: 15px; padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; }
            .reset-button:hover, .search-button:hover { background-color: #f0f0f0; }
        </style>
        <script>
            function toggleCollapse(contentId) {
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                } else {
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }
            }

            function copyToClipboard(content, message) {
                navigator.clipboard.writeText(content).then(function() {
                    alert(message + ' copied to clipboard!');
                }).catch(function(err) {
                    console.error('Could not copy text: ', err);
                });
            }

            function saveImage(imgId) {
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }

            function filterArticles() {
                const startDate = document.getElementById('date-start').value;
                const endDate = document.getElementById('date-end').value;
                const categories = Array.from(document.getElementById('category-filter').selectedOptions).map(option => option.value);
                const openAccessOnly = document.getElementById('open-access-filter').checked;

                document.querySelectorAll('.post').forEach(post => {
                    const publicationDate = post.querySelector('.publication-date').innerText.replace('- Published on ', '').trim();
                    const categoriesText = post.querySelector('.post-details span').innerText;
                    const isOpenAccess = post.querySelector('.open-access-label').innerText === 'Open Access';

                    let isVisible = true;

                    if (startDate && publicationDate < startDate) {
                        isVisible = false;
                    }

                    if (endDate && publicationDate > endDate) {
                        isVisible = false;
                    }

                    if (categories.length > 0 && !categories.some(category => categoriesText.includes(category))) {
                        isVisible = false;
                    }

                    if (openAccessOnly && !isOpenAccess) {
                        isVisible = false;
                    }

                    post.style.display = isVisible ? 'block' : 'none';
                });
            }

            function resetFilters() {
                document.getElementById('date-start').value = '';
                document.getElementById('date-end').value = '';
                document.getElementById('category-filter').selectedIndex = -1;
                document.getElementById('open-access-filter').checked = false;
                filterArticles();
            }
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="date-start">Filter by Date:</label>
            <input type="date" id="date-start">
            <input type="date" id="date-end">
            <label for="category-filter" style="margin-left: 15px;">Filter by Category:</label>
            <select id="category-filter" multiple>
                <option value="Echo">Echo</option>
                <option value="CT">CT</option>
                <option value="CMR">CMR</option>
                <option value="MPI/SPECT">MPI/SPECT</option>
            </select>

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
            <button class="search-button" onclick="filterArticles()">Search</button>
            <button class="reset-button" onclick="resetFilters()">Reset Filters</button>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>

                        <h4>4. Media</h4>
                        <div class="post-image">
                            <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                            <div class="button-row">
                                <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                                <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("EHJIMP_codex_editoris.html", "w") as file:
        file.write(html_content)
    print("HTML file 'EHJIMP_codex_editoris.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


Scraping URL: https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=2
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=3
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=4
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sor

#Version 2 READY ❤

In [None]:
# @title
import random
import requests
from bs4 import BeautifulSoup
import re
from PIL import Image, ImageDraw
import io
import base64

# Define color palette
colors = {
    "Dark Red": "#8C1B24",
    "Bright Red": "#BF1F1F",
    "Deep Maroon": "#592323",
    "Dark Charcoal": "#2E2B2B",
    "Warm Gray": "#D9D6D6",
    "Stone Gray": "#B0AFAF",
    "Ivory": "#F0F0F0"
}

# Define the base URL for constructing full links
BASE_URL = "https://link.springer.com"

# Step 1: Fetch and Parse the Website with Pagination
def fetch_articles(base_url):
    articles = []
    current_url = base_url

    while current_url:
        print(f"Scraping URL: {current_url}")
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Parse and collect articles
        article_container = soup.find('ol', class_='u-list-reset', attrs={"data-test": "darwin-search"})
        if article_container:
            for item in article_container.find_all('li'):
                title_tag = item.find('h3', class_='app-card-open__heading', attrs={"data-test": "title"})
                title = title_tag.get_text(strip=True) if title_tag else "No title found"

                link_tag = item.find('a', class_='app-card-open__link')
                link = BASE_URL + link_tag['href'] if link_tag else None
                doi_link = link.replace("https://link.springer.com/article/", "https://www.doi.org/") if link else "No DOI link"

                author_tag = item.find('span', attrs={"data-test": "authors"})
                if author_tag:
                    authors = author_tag.get_text(strip=True).split(", ")
                    first_author = authors[0]
                    last_author = authors[-1].lstrip('...') if len(authors) > 1 else "None"
                else:
                    first_author = last_author = "No authors listed"

                image_url = None
                picture_tag = item.find('picture')
                if picture_tag:
                    source_tag = picture_tag.find('source', attrs={"media": "(min-width: 480px)"})
                    if source_tag and 'srcset' in source_tag.attrs:
                        image_url = re.sub(r'w\d+h\d+', 'lw800', source_tag['srcset'].split(", ")[1].split(" ")[0])

                # Updated code to check for Open Access status
                access_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "oa-label"})
                access = "Open Access" if access_tag else None

                date_tag = item.find('span', class_='c-meta__item', attrs={"data-test": "published"})
                publication_date = date_tag.get_text(strip=True) if date_tag else "No date available"

                keywords = []
                if re.search(r'\b(Echocardiography|Echo|Cardiac Echo)\b', title, re.IGNORECASE):
                    keywords.append("Echo")
                if re.search(r'\b(CT|Computed Tomography|Cat Scan)\b', title, re.IGNORECASE):
                    keywords.append("CT")
                if re.search(r'\b(PET|Nuclear|MPI|SPECT)\b', title, re.IGNORECASE):
                    keywords.append("MPI/SPECT")
                if re.search(r'\b(CMR|Cardiac MRI|MRI)\b', title, re.IGNORECASE):
                    keywords.append("CMR")

                articles.append({
                    'title': title,
                    'link': link,
                    'doi_link': doi_link,
                    'first_author': first_author,
                    'last_author': last_author,
                    'image_url': image_url,
                    'access': access,
                    'keywords': ', '.join(keywords) if keywords else "None",
                    'publication_date': publication_date,
                    'first_author_last_name': first_author.split()[-1] if first_author else "Unknown"
                })

        next_button = soup.find('a', class_='eds-c-pagination__link', rel='next')
        if next_button and 'href' in next_button.attrs:
            current_url = BASE_URL + next_button['href']
        else:
            print("No more pages to scrape.")
            current_url = None

    return articles

# Step 2: Prepare Post Text with Segments
def prepare_post_segments(article):
    # Segment 1: Post Text
    post_text_options = [
        f"🚀 Exploring {article['title']} - {article['first_author_last_name']} et al. provide insights on:",
        f"🔍 Dive into the details of {article['title']} by {article['first_author_last_name']} et al.",
        f"✨ New research spotlight: {article['title']} - {article['first_author_last_name']} and colleagues share their findings.",
        f"📊 A fresh perspective on {article['title']} from {article['first_author_last_name']} et al."
    ]
    post_text = random.choice(post_text_options)

    # Segment 2: DOI Link
    doi_link = article['doi_link']

    # Segment 3A: X Accounts
    x_accounts = "@ The #EHJIMP ⭐ @chrisgraeni @RaberLorenz @domcbenz @AnnaGiuliaPavon @SheilaHegde @ShehabAnwer @CE_Guerreiro @ARrosendael @isaacshiri @MarcoGuglielmo @EdoardoConte16 @BFoldyna @albcipri6 #CardioX"

    # Segment 3B: LinkedIn Accounts
    linkedin_accounts = "📰 Prof. Dr. med. Christoph Gräni - Anna Giulia Pavon - Edoardo Conte MD FESC - Sheila Hegde - Alexander van Rosendael, MD PhD - Thomas Hellmut Schindler - Dominik C. Benz - Lorenz Räber, MD, PhD, - Borek Foldyna MD FSCCT - Isaac Shiri - Christopher Nguyen, Ph.D., FSCMR, FACC, - Shehab Anwer - Umang Gupta MD, MBA - Mihály Károlyi - Bruno B. - Hernán Mejía Rentería - Vineeta Ojha - Francesco Prati - Paul Schoenhagen - Andreas Schuster - Bradley Allen - Raymond Kwong - North American Society for Cardiovascular Imaging."

    return post_text, doi_link, x_accounts, linkedin_accounts

# Step 3: Generate HTML with Compact Design and Copy Buttons
def generate_html(articles):
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>The #EHJIMP Codex Operum</title>
        <style>
            body { font-family: Arial, sans-serif; line-height: 1.4; }
            .post { margin: 15px 0; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }
            .filter-bar { margin-bottom: 10px; }
            .button-row { display: flex; gap: 5px; margin-top: 5px; }
            .copy-button, .search-button { padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; border-radius: 0; }
            .copy-button:hover, .search-button:hover { background-color: #f0f0f0; }
            .linkedin-button { background-color: #0072b1; color: white; }
            .x-button { background-color: #f5f5dc; color: black; border: 1px solid black; }
            .collapsible-content { display: none; transition: all 0.3s ease; }
            .expand-button { color: black; font-weight: bold; margin-right: 10px; cursor: pointer; }
            h2 { font-size: 1.2em; color: #8C1B24; }
            .open-access-label { background-color: #8C1B24; color: white; padding: 2px 5px; border-radius: 4px; margin-right: 10px; }
            .publication-date { font-size: 0.9em; color: black; }
            .post-image { width: 50%; margin-top: 10px; }
            .reset-button, .search-button { margin-left: 15px; padding: 5px 10px; border: 1px solid #ccc; cursor: pointer; }
            .reset-button:hover, .search-button:hover { background-color: #f0f0f0; }
        </style>
        <script>
            function toggleCollapse(contentId) {
                const content = document.getElementById(contentId);
                const indicator = document.getElementById('indicator' + contentId.replace('content', ''));
                if (content.style.display === "none") {
                    content.style.display = "block";
                    indicator.innerText = "[-]";
                } else {
                    content.style.display = "none";
                    indicator.innerText = "[+]";
                }
            }

            function copyToClipboard(content, message) {
                navigator.clipboard.writeText(content).then(function() {
                    alert(message + ' copied to clipboard!');
                }).catch(function(err) {
                    console.error('Could not copy text: ', err);
                });
            }

            function saveImage(imgId) {
                const img = document.getElementById(imgId);
                const link = document.createElement('a');
                link.href = img.src;
                link.download = 'image.jpg';
                link.click();
            }

            function filterArticles() {
                const searchText = document.getElementById('search-text').value.toLowerCase();
                const openAccessOnly = document.getElementById('open-access-filter').checked;

                document.querySelectorAll('.post').forEach(post => {
                    const postText = post.innerText.toLowerCase();
                    const isOpenAccess = post.querySelector('.open-access-label').innerText === 'Open Access';

                    let isVisible = postText.includes(searchText);

                    if (openAccessOnly && !isOpenAccess) {
                        isVisible = false;
                    }

                    post.style.display = isVisible ? 'block' : 'none';
                });
            }

            function resetFilters() {
                document.getElementById('search-text').value = '';
                document.getElementById('open-access-filter').checked = false;
                filterArticles();
            }
        </script>
    </head>
    <body>
        <h1>The #EHJIMP Codex Operum</h1>
        <div class="filter-bar">
            <label for="search-text">Search:</label>
            <input type="text" id="search-text">

            <label style="margin-left: 15px;">
                <input type="checkbox" id="open-access-filter"> Open Access Only
            </label>
            <button class="search-button" onclick="filterArticles()">Search</button>
            <button class="reset-button" onclick="resetFilters()">Reset Filters</button>
        </div>
        <div class="articles">
    """

    for idx, article in enumerate(articles):
        post_text, doi_link, x_accounts, linkedin_accounts = prepare_post_segments(article)
        first_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['first_author'].replace(' ', '%20')}"
        last_author_linkedin = f"https://www.linkedin.com/search/results/people/?keywords={article['last_author'].replace(' ', '%20')}"
        first_author_x = f"https://x.com/search?q={article['first_author'].replace(' ', '%20')}&f=user"
        last_author_x = f"https://x.com/search?q={article['last_author'].replace(' ', '%20')}&f=user"

        html_content += f"""
        <div class="post">
            <div style="display: flex; align-items: center;">
                <span id="indicator{idx}" class="expand-button" onclick="toggleCollapse('content{idx}')">[+]</span>
                <h2>{article['title']}</h2>
            </div>
            <div class="open-access-label">{article['access'] if article['access'] else ''}</div>
            <div class="publication-date">- Published on {article['publication_date']}</div>

            <div id="content{idx}" class="collapsible-content">
                <div class="post-container">
                    <div class="post-details">
                        <h4>First Author</h4>
                        <div class="button-row">
                            <span>{article['first_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{first_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{first_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Last Author</h4>
                        <div class="button-row">
                            <span>{article['last_author']}</span>
                            <button class="search-button linkedin-button" onclick="window.open('{last_author_linkedin}', '_blank')">LinkedIn</button>
                            <button class="search-button x-button" onclick="window.open('{last_author_x}', '_blank')">X</button>
                        </div>

                        <h4>Category</h4>
                        <span>{article['keywords']}</span>

                        <h4>1. Post</h4>
                        <div class="post-text" id="postText{idx}">{post_text}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('postText{idx}').innerText, 'Post Text')">Copy Post</button>
                        </div>

                        <h4>2. DOI</h4>
                        <div class="doi-link" id="doiLink{idx}">{doi_link}</div>
                        <div class="button-row">
                            <button class="copy-button" onclick="copyToClipboard(document.getElementById('doiLink{idx}').innerText, 'DOI Link')">Copy DOI</button>
                        </div>

                        <h4>3A. X Accounts</h4>
                        <div class="x-accounts" id="xAccounts{idx}">{x_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button x-button" onclick="copyToClipboard(document.getElementById('xAccounts{idx}').innerText, 'X Accounts')">Copy X accounts</button>
                        </div>

                        <h4>3B. LinkedIn Accounts</h4>
                        <div class="linkedin-accounts" id="linkedinAccounts{idx}">{linkedin_accounts}</div>
                        <div class="button-row">
                            <button class="copy-button linkedin-button" onclick="copyToClipboard(document.getElementById('linkedinAccounts{idx}').innerText, 'LinkedIn Accounts')">Copy LinkedIn accounts</button>
                        </div>

                        <h4>4. Media</h4>
                        <div class="post-image">
                            <img id="image{idx}" src="{article['image_url']}" alt="Full Article Image">
                            <div class="button-row">
                                <button class="copy-button" onclick="copyToClipboard(document.getElementById('image{idx}').src, 'Media URL')">Copy Media</button>
                                <button class="copy-button" onclick="saveImage('image{idx}')">Save Media</button>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        """

    html_content += "</div></body></html>"

    with open("EHJIMP_codex_editoris.html", "w") as file:
        file.write(html_content)
    print("HTML file 'EHJIMP_codex_editoris.html' has been generated successfully.")

# Main function
def main():
    base_url = 'https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst'
    articles = fetch_articles(base_url)
    generate_html(articles)

main()


Scraping URL: https://link.springer.com/search?new-search=true&facet-journal-id=10554&query=*&content-type=article&date=custom&dateFrom=2024&dateTo=&sortBy=newestFirst
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=2
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=3
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sortBy=newestFirst&search-within=Journal&facet-journal-id=10554&page=4
Scraping URL: https://link.springer.com/search?new-search=true&query=*&content-type=Article&featureFlags.show-entitlements=true&date=custom&dateFrom=2024&sor