In [1]:
import os
import time
import re
from dotenv import load_dotenv

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from openai import OpenAI

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

from deep_translator import GoogleTranslator

In [2]:

# function to scrape FINEXTRA -> PAYMENTS -> COMPANY UPDATES
def scrape_finextra_payments():
    url = "https://www.finextra.com/channel/payments"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        # finds main artice
        article = driver.find_element(By.CSS_SELECTOR, "h3.card-title a")
        article_title = article.text.strip()
        article_url = article.get_attribute("href") 
        
        # gets to the article and extracts content
        driver.get(article_url)
        time.sleep(2)
        paragraphs = driver.find_elements(By.CSS_SELECTOR, ".article-content p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in Finextra payments: {e}")
        return None
    finally:
        driver.quit() 

# function to scrape FINEXTRA -> SECURITY -> COMPANY UPDATES
def scrape_finextra_security():
    url = "https://www.finextra.com/channel/security"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "h3.card-title a")
        article_title = article.text.strip()
        article_url = article.get_attribute("href") 
        
        driver.get(article_url)
        time.sleep(2)
        paragraphs = driver.find_elements(By.CSS_SELECTOR, ".article-content p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in Finextra security: {e}")
        return None
    finally:
        driver.quit()

# function to scrape PYMNTS B2B       
def scrape_pymnts_b2b():
    url = "https://www.pymnts.com/topic/b2b"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "a.fw-bolder.fs-5.d-block.mb-2")
        article_title = article.get_attribute("title")
        article_url = article.get_attribute("href")

        driver.get(article_url)
        time.sleep(2)

        # extracts content from the first article
        article_content = driver.find_element(By.CSS_SELECTOR, ".single.lh-article.mt-1.lnk-article")

        # extracts all paragraphs from first article
        paragraphs = article_content.find_elements(By.CSS_SELECTOR, "p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in PYMNTS b2b: {e}")
        return None
    finally:
        driver.quit()

# function to scrape PYMNTS -> RETAIL
def scrape_pymnts_retail():
    url = "https://www.pymnts.com/topic/retail"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "a.fw-bolder.fs-5.d-block.mb-2")
        article_title = article.get_attribute("title")
        article_url = article.get_attribute("href")

        driver.get(article_url)
        time.sleep(2)

        article_content = driver.find_element(By.CSS_SELECTOR, ".single.lh-article.mt-1.lnk-article")

        paragraphs = article_content.find_elements(By.CSS_SELECTOR, "p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in PYMNTS retail: {e}")
        return None
    finally:
        driver.quit()

# function to scrape PYMNTS -> FINTECH     
def scrape_pymnts_fintech():
    url = "https://www.pymnts.com/topic/fintech"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "a.fw-bolder.fs-5.d-block.mb-2")
        article_title = article.get_attribute("title")
        article_url = article.get_attribute("href")

        driver.get(article_url)
        time.sleep(2)

        article_content = driver.find_element(By.CSS_SELECTOR, ".single.lh-article.mt-1.lnk-article")

        paragraphs = article_content.find_elements(By.CSS_SELECTOR, "p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in PYMNTS fintech: {e}")
        return None
    finally:
        driver.quit()
          
# function to scrape PYMNTS -> CRYPTO
def scrape_pymnts_crypto():
    url = "https://www.pymnts.com/topic/crypto"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "a.fw-bolder.fs-5.d-block.mb-2")
        article_title = article.get_attribute("title")
        article_url = article.get_attribute("href")

        driver.get(article_url)
        time.sleep(2)

        article_content = driver.find_element(By.CSS_SELECTOR, ".single.lh-article.mt-1.lnk-article")

        paragraphs = article_content.find_elements(By.CSS_SELECTOR, "p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in PYMNTS crypto: {e}")
        return None
    finally:
        driver.quit()

# function to scrape PAYPERS -> FRAUD AND CRIME
def scrape_the_paypers():
    url = "https://www.thepaypers.com/categories/fraud-news-and-financial-crime-news"
    driver = initialize_driver()
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "a[href*='/digital-identity-security-online-fraud/']")
        article_title = article.find_element(By.TAG_NAME, "b").text.strip()  
        article_url = article.get_attribute("href")  
        
        driver.get(article_url)
        time.sleep(2)

        paragraphs = driver.find_elements(By.CSS_SELECTOR, ".content.regular_content p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in The Paypers: {e}")
        return None
    finally:
        driver.quit()


# function to scrape PAYMENTS JOURNAL -> TOP NEWS
def scrape_payments_journal():
    url = "https://www.paymentsjournal.com"
    
    driver = webdriver.Chrome()  
    try:
        driver.get(url)
        time.sleep(3)

        article = driver.find_element(By.CSS_SELECTOR, "h3.jeg_post_title a")
        article_title = article.text.strip()
        article_url = article.get_attribute("href") 

        driver.get(article_url)
        time.sleep(2)
        paragraphs = driver.find_elements(By.CSS_SELECTOR, ".content-inner p")
        content = " ".join([p.text.strip() for p in paragraphs])

        return {"title": article_title, "content": content, "url": article_url}
    except Exception as e:
        print(f"Error in Payments Journal: {e}")
        return None
    finally:
        driver.quit() 


# function to start Selenium
def initialize_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")  
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    return driver


In [3]:
if __name__ == "__main__":
    results = [
        scrape_finextra_payments(), #payments
        scrape_pymnts_fintech(),#fintech
        scrape_finextra_security(), #security
        scrape_the_paypers(), #fraud
        scrape_payments_journal(), #fraud
        scrape_pymnts_b2b(), # b2b
        scrape_pymnts_crypto(),#crypto
        scrape_pymnts_retail(), #retail 
    ]
    
    for result in results:
        if result:
            print(f"Title: {result['title']}\nURL: {result['url']}\nContent: {result['content']}\n")


Title: European Payments Council publishes 2025 payment scheme rulebooks
URL: https://www.finextra.com/pressarticle/103450/european-payments-council-publishes-2025-payment-scheme-rulebooks
Content: The European Payments Council (EPC) has released the five rulebooks for its 2025 payment schemes, along with the associated Implementation Guidelines (IGs). 1 Like 1 1 hour Be the first to comment This content is provided by an external author without editing by Finextra. It expresses the views and opinions of the author. These updates will come into effect on 5 October 2025, marking a departure from the traditional November implementation timeline. The October 2025 date aligns with the implementation schedule of Regulation (EU) 2024/886, also known as the Instant Payments Regulation (IPR), which amends Regulation (EU) 260/2012 (the SEPA Regulation).

This adjustment ensures consistency across all relevant EPC schemes, simplifying preparation for Payment Service Providers (PSPs), Clearing an

In [4]:
load_dotenv()
api_key = os.getenv("API_KEY")


client = OpenAI(api_key)

#System prompt; currently in SPANISH 
def chat_with_system(system_prompt: str, user_prompt:str) -> str:
    """Realiza una llamada con system prompt"""
    try:
        response = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[
                {"role":"system","content":system_prompt},
                {"role":"user","content":user_prompt}
            ])
            
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

In [5]:
# function to translate
def translate_text(text, source_lang="en", target_lang="es"):
    """Traduce texto de un idioma a otro usando GoogleTranslator."""
    try:
        return GoogleTranslator(source=source_lang, target=target_lang).translate(text)
    except Exception as e:
        print(f"Error traduciendo texto: {e}")
        return text 
    
# function that summarises and creates the newsletter
def summarize_and_create_newsletter(articles_by_section):
    system_prompt = (
        "Eres un asistente experto en redactar res√∫menes claros y concisos para newsletters. "
        "Adem√°s, eres un experto en la industria de sistemas de pagos."
    )

    # html with the style
    newsletter = """
    <html>
    <head>
        <style>
            body { font-family: Arial, sans-serif; color: #333333; line-height: 1.6; }
            h1 { color: #0056b3; }
            h2 { color: #007bff; margin-top: 30px; }
            .article-title { font-size: 20px; font-weight: bold; margin-bottom: 10px; }
            .article-summary { font-size: 16px; margin-bottom: 10px; }
            .read-more { color: #007bff; text-decoration: none; font-weight: bold; }
            .separator { border-top: 1px solid #ddd; margin: 20px 0; }
            .footer { font-size: 14px; color: #777; margin-top: 40px; }
        </style>
    </head>
    <body>
        <h1>Insights Semanales: Innovaci√≥n y Tendencias en Pagos</h1>
        <p>¬°Bienvenido a tu resumen semanal! Aqu√≠ tienes las noticias m√°s relevantes divididas por categor√≠a.</p>
        <div class="separator"></div>
    """

    # goes through sections and articles
    for section, articles in articles_by_section.items():
        newsletter += f"<h2>{section}</h2>"
        
        for article in articles:
            if article:
                # spanish translation
                translated_title = translate_text(article['title'], source_lang="en", target_lang="es")
                
                # user prompt with specific info
                user_prompt = (
                    f"Resumen breve para una newsletter del siguiente art√≠culo. Lim√≠tate a los puntos clave para mejorar la experiencia de lectura. "
                    f"Red√∫celo a 2-3 frases:\n\n"
                    f"T√≠tulo: {article['title']}\n"
                    f"Contenido: {article['content']}\n\n"
                    f"Resumen:"
                )
                
                # function`chat_with_system` that generates a summary
                summary = chat_with_system(system_prompt, user_prompt)

                # Deletes any special characters
                summary_cleaned = re.sub(r'\*\*(.*?)\*\*', '', summary).strip()
                
                # HTML formatting
                newsletter += f"""
                <h3 class="article-title">{translated_title}</h3>
                <p class="article-summary">{summary_cleaned}</p>
                <p><a href="{article['url']}" class="read-more">Leer m√°s aqu√≠</a></p>
                <div class="separator"></div>
                """

    # footer and colsing HTML
    newsletter += """
        <p class="footer">Gracias por leernos. ¬°Nos vemos la pr√≥xima semana con m√°s actualizaciones!</p>
        <p class="footer">Atentamente, Eloy Vicente L√≥pez.</p>
    </body>
    </html>
    """

    return newsletter

# saves the .html file
if __name__ == "__main__":
    # sections in the newsletter and articles that are included in each one
    articles_by_section = {
        "üîç Innovaci√≥n en Pagos y Fintech": [
            scrape_finextra_payments(),
            scrape_pymnts_fintech()
        ],
        "üîê Fraude y Seguridad": [
            scrape_finextra_security(),
            scrape_the_paypers(),
            scrape_payments_journal()
        ],
        "üíº Pagos Empresariales y Soluciones B2B": [
            scrape_pymnts_b2b()
        ],
        "üåê Criptomonedas y Retail": [
            scrape_pymnts_crypto(),
            scrape_pymnts_retail()
        ]
    }

    # generates the newsletter
    newsletter_content = summarize_and_create_newsletter(articles_by_section)

    # saves the newsletter as .html
    with open("newsletter3.html", "w", encoding="utf-8") as file:
        file.write(newsletter_content)

    print("Newsletter successfully generated :)")


Newsletter successfully generated :)
