In [1]:
import feedparser
from datetime import datetime
from openai import OpenAI

# --- CONFIG ---
RSS_URL = "https://www.abc.com.py/arc/outboundfeeds/rss/deportes/?outputType=xml"
MAX_NEWS = 5  # Cantidad de noticias a incluir en el resumen
OPENAI_API_KEY = "TU_API_KEY"  # Reemplazá con tu API key

# --- PASO 1: Leer RSS ---
feed = feedparser.parse(RSS_URL)

news_items = []
for entry in feed.entries[:MAX_NEWS]:
    news_items.append({
        "title": entry.title,
        "link": entry.link,
        "summary": entry.summary
    })

# --- PASO 2: Construir texto para resumen ---
text_for_summary = "Genera un resumen diario de noticias deportivas paraguayas para escucharlo en 5 minutos:\n\n"
for i, item in enumerate(news_items, 1):
    text_for_summary += f"{i}. {item['title']} - {item['summary']}\n\n"

# --- PASO 3: Generar resumen con ChatGPT ---
client = OpenAI(api_key=OPENAI_API_KEY)

response = client.chat.completions.create(
    model="gpt-5-mini",
    messages=[
        {"role": "system", "content": "Eres un asistente que resume noticias deportivas en formato de audio breve."},
        {"role": "user", "content": text_for_summary}
    ],
    max_output_tokens=500  # Ajustá según cuánto texto quieras
)

resumen_final = response.choices[0].message.content

# --- PASO 4: Mostrar resumen ---
print("=== RESUMEN DEPORTIVO ABC ===")
print(resumen_final)


TypeError: Completions.create() got an unexpected keyword argument 'max_output_tokens'

In [3]:
from ai4free import ThinkAnyAI

opengpt = ThinkAnyAI()

while True:
    prompt = input("You have to tell me a joke ")
    response_str = opengpt.chat(prompt)
    print(response_str)

ImportError: cannot import name 'AsyncProvider' from 'webscout.AIbase' (c:\Users\ThinkPad-PC\Documents\GitHub\AFIP-nuevo\env\Lib\site-packages\webscout\AIbase.py)

In [5]:
pip install feedparser transformers torch gTTS

^C
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting transformers
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
     ---------------------------------------- 0.0/44.0 kB ? eta -:--:--
     --------- ------------------------------ 10.2/44.0 kB ? eta -:--:--
     ----------------- -------------------- 20.5/44.0 kB 165.2 kB/s eta 0:00:01
     -------------------------- ----------- 30.7/44.0 kB 187.9 kB/s eta 0:00:01
     -------------------------------------- 44.0/44.0 kB 216.4 kB/s eta 0:00:00
Collecting torch
  Downloading torch-2.9.1-cp311-cp311-win_amd64.whl.metadata (30 kB)
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.20.1-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2025.11.3-cp311-cp311-win_amd64.whl.metadata (41 kB)
   

In [None]:
# =========================================
# Resumen diario de ABC Deportes + Audio
# =========================================

import feedparser
from transformers import pipeline
from gtts import gTTS
from datetime import datetime

# ------------------------
# CONFIGURACIÓN
# ------------------------
RSS_URL = "https://www.abc.com.py/arc/outboundfeeds/rss/deportes/?outputType=xml"
MAX_NEWS = 5                # Cantidad de noticias a incluir
RESUMEN_MAX_WORDS = 150      # Máximo de palabras para el resumen
OUTPUT_AUDIO = "resumen_diario.mp3"

# ------------------------
# PASO 1: Leer RSS
# ------------------------
feed = feedparser.parse(RSS_URL)

news_text = ""
for i, entry in enumerate(feed.entries[:MAX_NEWS], start=1):
    title = entry.title
    summary = entry.summary
    news_text += f"{i}. {title}. {summary}\n\n"

if not news_text:
    news_text = "No se encontraron noticias deportivas hoy."

# ------------------------
# PASO 2: Resumir con Hugging Face
# ------------------------
summarizer = pipeline("summarization", model="t5-small")

resumen = summarizer(news_text, max_length=RESUMEN_MAX_WORDS, min_length=50, do_sample=False)
resumen_final = resumen[0]['summary_text']

print("=== RESUMEN DEL DÍA ===")
print(resumen_final)

# ------------------------
# PASO 3: Convertir a audio con gTTS
# ------------------------
tts = gTTS(text=resumen_final, lang='es')
tts.save(OUTPUT_AUDIO)

print(f"\nArchivo de audio generado: {OUTPUT_AUDIO}")
print("¡Listo para escucharlo!")

In [6]:
import requests
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET

# ------------------------
# CONFIGURACIÓN
# ------------------------
RSS_URL = "https://www.abc.com.py/arc/outboundfeeds/rss/deportes/?outputType=xml"
OUTPUT_FILE = "abc_deportes_limpio.txt"

# ------------------------
# PASO 1: Descargar el RSS
# ------------------------
response = requests.get(RSS_URL)
if response.status_code != 200:
    raise Exception(f"Error al descargar RSS: {response.status_code}")

rss_content = response.content

# ------------------------
# PASO 2: Parsear XML
# ------------------------
root = ET.fromstring(rss_content)

# ------------------------
# PASO 3: Extraer y limpiar cada <item>
# ------------------------
items = root.findall(".//item")

with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    for i, item in enumerate(items, start=1):
        title = item.findtext("title") or "N/A"
        link = item.findtext("link") or "N/A"
        author = item.findtext("{http://purl.org/dc/elements/1.1/}creator") or "N/A"
        pubDate = item.findtext("pubDate") or "N/A"
        
        # Contenido HTML
        raw_content = item.findtext("{http://purl.org/rss/1.0/modules/content/}encoded") or ""
        soup = BeautifulSoup(raw_content, "html.parser")
        content_text = soup.get_text(separator=" ", strip=True)

        # Imagen si existe
        media = item.find("{http://search.yahoo.com/mrss/}content")
        if media is not None:
            image_url = media.attrib.get("url", "N/A")
            image_desc_tag = media.find("{http://search.yahoo.com/mrss/}description")
            image_desc = image_desc_tag.text if image_desc_tag is not None else "N/A"
        else:
            image_url = "N/A"
            image_desc = "N/A"

        # Escribir bloque
        f.write(f"=== NOTICIA {i} ===\n")
        f.write(f"Título: {title}\n")
        f.write(f"Autor: {author}\n")
        f.write(f"Fecha: {pubDate}\n")
        f.write(f"Link: {link}\n")
        f.write(f"Imagen URL: {image_url}\n")
        f.write(f"Imagen desc: {image_desc}\n")
        f.write("Contenido:\n")
        f.write(content_text.strip() + "\n")
        f.write("\n-----------------------------\n\n")

print(f"Archivo generado: {OUTPUT_FILE}")


Archivo generado: abc_deportes_limpio.txt
