<a href="https://colab.research.google.com/github/4L3M4R/cerbero/blob/main/cerbero_post.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================
#             CERBERO POSTMARKET
#     Descarga precios, filtra, calcula features
# ===============================================

import os
import requests
import pandas as pd
import numpy as np
import yfinance as yf
from ta.volatility import AverageTrueRange, BollingerBands
from ta.momentum import ROCIndicator
from ta.trend import MACD
from ta.momentum import RSIIndicator
import datetime

# ===============================================
#             CONFIGURACIÓN Y PARÁMETROS
# ===============================================

# Lista de activos
activos = {}
with open("activos.txt", "r") as f:
    for line in f:
        symbol, source, search_name = line.strip().split(":")
        activos[symbol.strip()] = {
            "source": source.strip().lower(),
            "search_name": search_name.strip()
        }

# Configuración general
config = {}
with open("config.txt", "r") as f:
    for line in f:
        if ":" in line:
            key, value = line.strip().split(":")
            config[key.strip()] = value.strip()

granularity = config.get("frecuencia", "1h")
limit = int(config.get("limit", "100"))
period = config.get("period", "5d")

# ===============================================
#           FUNCIONES AUXILIARES
# ===============================================

def registrar_log(message, log_file="run_summary_post.log"):
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_file, "a") as log:
        log.write(f"[{timestamp}] {message}\n")

def cargar_df_existente(symbol):
    filename = f"{symbol}_datos.txt"
    if os.path.exists(filename):
        df = pd.read_csv(filename, sep="\t")
        df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
        df["timestamp"] = df["timestamp"].dt.tz_localize(None)
        existing_dates = set(df["timestamp"].dt.date)
    else:
        df = pd.DataFrame()
        existing_dates = set()
    return df, existing_dates

def filtrar_nuevas_fechas(df_new, existing_dates):
    df_new["timestamp"] = pd.to_datetime(df_new["timestamp"], errors="coerce")
    df_new["timestamp"] = df_new["timestamp"].dt.tz_localize(None)
    df_new["date_only"] = df_new["timestamp"].dt.date
    df_to_add = df_new[~df_new["date_only"].isin(existing_dates)].copy()
    df_to_add.drop(columns=["date_only"], inplace=True, errors="ignore")
    return df_to_add

def save_df(df, symbol):
    filename = f"{symbol}_datos.txt"
    df.to_csv(filename, sep="\t", index=False)
    print(f"Guardado en {filename} | Filas totales: {len(df)}")

def descargar_datos_bitget(symbol, granularity, limit):
    url = "https://api.bitget.com/api/v2/mix/market/history-candles"
    params = {"symbol": symbol, "productType": "USDT-FUTURES", "granularity": granularity, "limit": limit}
    response = requests.get(url, params=params)
    if response.status_code != 200:
        registrar_log(f"Error Bitget {symbol}: {response.text}")
        return None
    data = response.json().get("data", [])
    if not data:
        registrar_log(f"No hay datos Bitget {symbol}")
        return None
    df = pd.DataFrame(data, columns=["timestamp", "open", "high", "low", "close", "volume", "quoteVolume"])
    df["timestamp"] = pd.to_datetime(df["timestamp"].astype(int), unit='ms', utc=True)
    df["symbol"] = symbol
    df = df.sort_values("timestamp")
    registrar_log(f"Descargados {len(df)} registros desde Bitget para {symbol}")
    return df

def descargar_datos_yf(symbol, interval, period, limit):
    data = yf.download(tickers=symbol, interval=interval, period=period)
    if data.empty:
        registrar_log(f"No se encontraron datos Yahoo Finance {symbol}")
        return None
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = [col[0].lower() for col in data.columns]
    else:
        data.columns = [col.lower() for col in data.columns]
    data = data.reset_index()
    rename_map = {'open':'open','high':'high','low':'low','close':'close','volume':'volume','Date':'timestamp'}
    data.rename(columns=rename_map, inplace=True)
    data["symbol"] = symbol
    data = data.head(limit)
    registrar_log(f"Descargados {len(data)} registros desde Yahoo Finance para {symbol}")
    return data

# ===============================================
#           CALCULO DE INDICADORES
# ===============================================

def calcular_features(df):
    for col in ['open','high','low','close','volume']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    df['gap_apertura_pct'] = (df['open'] - df['close'].shift(1)) / df['close'].shift(1) * 100
    df['volatilidad_diaria'] = (df['high'] - df['low']) / df['low'] * 100
    df['vol_5d'] = df['volatilidad_diaria'].rolling(5).std()
    df['vol_10d'] = df['volatilidad_diaria'].rolling(10).std()
    df['vol_rel_5d'] = df['volume'] / df['volume'].rolling(5).mean()
    df['return_pct'] = (df['close'] - df['close'].shift(1)) / df['close'].shift(1) * 100
    df['close_open_pct'] = (df['close'] - df['open']) / df['open'] * 100
    df['range_pct'] = (df['high'] - df['low']) / df['low'] * 100
    df['volume_change_pct'] = (df['volume'] - df['volume'].shift(1)) / df['volume'].shift(1) * 100

    df['rsi_14'] = RSIIndicator(close=df['close'], window=14).rsi()
    macd = MACD(close=df['close'], window_slow=26, window_fast=12, window_sign=9)
    df['macd'] = macd.macd()
    df['macd_signal'] = macd.macd_signal()
    df['macd_diff'] = macd.macd_diff()
    df['atr_14'] = AverageTrueRange(high=df['high'], low=df['low'], close=df['close'], window=14).average_true_range()
    df['momentum_12'] = ROCIndicator(close=df['close'], window=12).roc()
    bb = BollingerBands(close=df['close'], window=20, window_dev=2)
    df['bb_upper'] = bb.bollinger_hband()
    df['bb_lower'] = bb.bollinger_lband()
    df['bb_pctb'] = bb.bollinger_pband()
    df.fillna(0, inplace=True)
    return df

# ===============================================
#           PROCESAMIENTO POR ACTIVO
# ===============================================

def procesar_activo(symbol, source, search_name, granularity, period, limit):
    df_existing, existing_dates = cargar_df_existente(symbol)

    df_new = descargar_datos_bitget(symbol, granularity, limit) if source=="bitget" else descargar_datos_yf(symbol, granularity, period, limit)

    if df_new is None or df_new.empty:
        registrar_log(f"{symbol} - No se descargaron datos")
        return

    os.makedirs("logs", exist_ok=True)
    df_new["download_date"] = pd.Timestamp.utcnow()
    df_new.to_csv(f"logs/{symbol}_descargadas_{pd.Timestamp.utcnow().date()}.csv", index=False)

    df_to_add = filtrar_nuevas_fechas(df_new, existing_dates)
    if df_to_add.empty:
        registrar_log(f"{symbol} - No hay nuevas filas")
        return

    df_to_add["added_date"] = pd.Timestamp.utcnow()
    df_to_add.to_csv(f"logs/{symbol}_nuevas_agregadas_{pd.Timestamp.utcnow().date()}.csv", index=False)

    df_final = pd.concat([df_existing, df_to_add], ignore_index=True) if not df_existing.empty else df_to_add
    df_final = calcular_features(df_final)
    save_df(df_final, symbol)

    registrar_log(f"{symbol} - Descargadas {len(df_new)} filas, agregadas {len(df_to_add)}, total histórico {len(df_final)}")

# ===============================================
#           EJECUCIÓN PRINCIPAL
# ===============================================

for symbol, info in activos.items():
    procesar_activo(symbol, info["source"], info["search_name"], granularity, period, limit)

print("Completed postmarket")
