<a href="https://colab.research.google.com/github/4L3M4R/cerbero/blob/main/cerbero-pre.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================
#             CERBERO PREMARKET
#   Descarga noticias y calcula sentiment
# ===============================================

import os
import pandas as pd
import feedparser
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import datetime
from urllib.parse import quote

# ===============================================
#             CONFIGURACIÓN
# ===============================================

# Lista de activos
activos = {}
with open("activos.txt", "r") as f:
    for line in f:
        symbol, source, search_name = line.strip().split(":")
        activos[symbol.strip()] = {
            "source": source.strip().lower(),
            "search_name": search_name.strip()
        }

# Inicialización Sentiment
nltk.download('vader_lexicon')
vader_analyzer = SentimentIntensityAnalyzer()
finbert_tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
finbert_model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

# ===============================================
#           FUNCIONES AUXILIARES
# ===============================================

def registrar_log(message, log_file="run_summary_pre.log"):
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_file, "a") as log:
        log.write(f"[{timestamp}] {message}\n")

def descargar_noticias_y_calcular_sentiment(symbol, search_name):
    os.makedirs("logs", exist_ok=True)

    query = quote(search_name)
    feed = feedparser.parse(f"https://news.google.com/rss/search?q={query}")
    noticias = []
    for entry in feed.entries:
        noticias.append({
            "timestamp": entry.published,
            "title": entry.title,
            "link": entry.link
        })
    df = pd.DataFrame(noticias)
    if df.empty:
        registrar_log(f"{symbol} - No se encontraron noticias")
        return

    # Calcular sentiment
    df["vader_sentiment"] = df["title"].apply(lambda x: vader_analyzer.polarity_scores(x)["compound"])
    df["finbert_sentiment"] = 0  # Placeholder para FinBERT

    # Promedios
    vader_promedio = df["vader_sentiment"].mean()
    finbert_promedio = df["finbert_sentiment"].mean()

    # Actualizar _datos.txt
    actualizar_sentimiento(symbol, vader_promedio, finbert_promedio)

    # Guardar log de noticias nuevas
    today_str = pd.Timestamp.utcnow().date()
    df.to_csv(f"logs/{symbol}_nuevas_agregadas_{today_str}.csv", index=False)
    registrar_log(f"{symbol} - Guardadas {len(df)} noticias con sentiment")

def actualizar_sentimiento(symbol, vader_promedio, finbert_promedio):
    """Actualiza el archivo de datos del símbolo con la información de sentimiento promedio."""
    import pandas as pd
    import os
    from datetime import datetime

    file_path = f"datos_{symbol}.txt"
    if not os.path.exists(file_path):
        print(f"⚠️ Archivo no encontrado: {file_path}")
        return

    df = pd.read_csv(file_path)
    print(f"✅ Archivo cargado correctamente: {file_path}")
    print("Columnas actuales:", df.columns.tolist())

    # --- Identificar columna de fecha ---
    date_col = None
    for col in df.columns:
        if col.lower() in ["timestamp", "date", "fecha", "day"]:
            date_col = col
            break

    if date_col is None:
        print("⚠️ No se encontró columna de fecha. Se usará la fecha actual.")
        df["timestamp"] = datetime.now().date()
    else:
        df["timestamp"] = pd.to_datetime(df[date_col], errors="coerce").dt.date

    # --- Buscar la línea correspondiente a hoy ---
    hoy = datetime.now().date()
    mask = df["timestamp"] == hoy

    if mask.any():
        print(f"🟢 Actualizando línea existente para {hoy} en {symbol}")
        df.loc[mask, "vader_sentiment"] = vader_promedio
        df.loc[mask, "finbert_sentiment"] = finbert_promedio
    else:
        print(f"🟡 No existe línea para {hoy}, agregando nueva.")
        nueva_fila = pd.DataFrame({
            "timestamp": [hoy],
            "vader_sentiment": [vader_promedio],
            "finbert_sentiment": [finbert_promedio],
        })
        df = pd.concat([df, nueva_fila], ignore_index=True)

    # --- Guardar el archivo actualizado ---
    df.to_csv(file_path, index=False)
    print(f"💾 Archivo actualizado y guardado: {file_path}")

# ===============================================
#           EJECUCIÓN PRINCIPAL
# ===============================================

for symbol, info in activos.items():
    descargar_noticias_y_calcular_sentiment(symbol, info["search_name"])

print("Completed premarket")
