<a href="https://colab.research.google.com/github/4L3M4R/cerbero/blob/main/cerbero-pre.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================
#             CERBERO PREMARKET
#   Descarga noticias y calcula sentiment
# ===============================================

import os
import pandas as pd
import feedparser
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import datetime
from urllib.parse import quote

# ===============================================
#             CONFIGURACIÓN
# ===============================================

# Lista de activos
activos = {}
with open("activos.txt", "r") as f:
    for line in f:
        symbol, source, search_name = line.strip().split(":")
        activos[symbol.strip()] = {
            "source": source.strip().lower(),
            "search_name": search_name.strip()
        }

# Inicialización Sentiment
nltk.download('vader_lexicon')
vader_analyzer = SentimentIntensityAnalyzer()
finbert_tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
finbert_model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

# ===============================================
#           FUNCIONES AUXILIARES
# ===============================================

def registrar_log(message, log_file="run_summary_pre.log"):
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_file, "a") as log:
        log.write(f"[{timestamp}] {message}\n")

def descargar_noticias_y_calcular_sentiment(symbol, search_name):
    os.makedirs("logs", exist_ok=True)

    query = quote(search_name)
    feed = feedparser.parse(f"https://news.google.com/rss/search?q={query}")
    noticias = []
    for entry in feed.entries:
        noticias.append({
            "timestamp": entry.published,
            "title": entry.title,
            "link": entry.link
        })
    df = pd.DataFrame(noticias)
    if df.empty:
        registrar_log(f"{symbol} - No se encontraron noticias")
        return

    df["vader_sentiment"] = df["title"].apply(lambda x: vader_analyzer.polarity_scores(x)["compound"])
    df["finbert_sentiment"] = 0  # Placeholder, puedes calcular FinBERT aquí si quieres

    # Calcula promedio de Vader
    vader_promedio = df["vader_sentiment"].mean()
    finbert_promedio = df["finbert_sentiment"].mean()

    # Actualiza _datos.txt
    actualizar_sentimiento(symbol, vader_promedio, finbert_promedio)

    df.to_csv(f"logs/{symbol}_nuevas_agregadas_{pd.Timestamp.utcnow().date()}.csv", index=False)
    registrar_log(f"{symbol} - Guardadas {len(df)} noticias con sentiment")


def actualizar_sentimiento(symbol, vader, finbert=0):
    import os
    hoy = pd.Timestamp.utcnow().date().strftime("%Y-%m-%d")
    filename = f"{symbol}_datos.txt"

    if not os.path.exists(filename):
        # Si el archivo no existe, crearlo con solo la columna sentiment
        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"timestamp,vader_sentiment,finbert_sentiment\n")
            f.write(f"{hoy},{vader},{finbert}\n")
        return

    # Leer archivo existente
    df = pd.read_csv(filename, sep="\t")
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce").dt.date

    # Si ya hay línea de hoy, actualizar
    if hoy in df["timestamp"].astype(str).values:
        df.loc[df["timestamp"] == pd.to_datetime(hoy).date(), "vader_sentiment"] = vader
        df.loc[df["timestamp"] == pd.to_datetime(hoy).date(), "finbert_sentiment"] = finbert
    else:
        # Si no hay línea de hoy, agregar
        nueva = {"timestamp": hoy, "vader_sentiment": vader, "finbert_sentiment": finbert}
        df = pd.concat([df, pd.DataFrame([nueva])], ignore_index=True)

    df.to_csv(filename, sep="\t", index=False)
    registrar_log(f"{symbol} - Sentiment actualizado: Vader={vader}, FinBERT={finbert}")

# ===============================================
#           EJECUCIÓN PRINCIPAL
# ===============================================

for symbol, info in activos.items():
    descargar_noticias_y_calcular_sentiment(symbol, info["search_name"])

print("Completed premarket")
