In [1]:
import requests
import pandas as pd
import time

# Solo organizaciones con ticker en Yahoo Finance
ORGS_WITH_TICKER = {
    "google": "GOOGL",
    "facebook": "META",
    "microsoft": "MSFT",
    "Salesforce": "CRM"
}

API_MODELS = "https://huggingface.co/api/models"
LIMIT = 1000  # M√°ximo por request

def fetch_all_models_for_org(org: str):
    print(f"üì• Cargando modelos de {org}...")
    models = []
    skip = 0
    while True:
        params = {
            "author": org,
            "limit": LIMIT,
            "skip": skip,
            "full": "true"
        }
        response = requests.get(API_MODELS, params=params)
        if response.status_code != 200:
            print(f"‚ùå Error obteniendo datos de {org} en skip={skip}")
            break
        data = response.json()
        if not data:
            break
        models.extend(data)
        print(f"‚Üí Obtenidos {len(data)} modelos (total: {len(models)})")
        skip += LIMIT
        time.sleep(0.3)  # Para evitar rate limit
    return models

# Descargar modelos
all_models = []
for org in ORGS_WITH_TICKER:
    org_models = fetch_all_models_for_org(org)
    for model in org_models:
        model['ticker'] = ORGS_WITH_TICKER[org]
    all_models.extend(org_models)

# Convertir a DataFrame
df_models = pd.json_normalize(all_models)
df_models.drop_duplicates(subset="modelId", inplace=True)

# Guardar CSV de modelos
df_models.drop(columns=["tags"]).to_csv("models_filtered.csv", index=False)
print("‚úÖ models_filtered.csv creado.")

# Expandir tags
tags_rows = []
for _, row in df_models.iterrows():
    if isinstance(row["tags"], list):
        for tag in row["tags"]:
            tags_rows.append({"modelId": row["modelId"], "tag": tag})
df_tags = pd.DataFrame(tags_rows)
df_tags.to_csv("tags_filtered.csv", index=False)
print("‚úÖ tags_filtered.csv creado.")


üì• Cargando modelos de google...
‚Üí Obtenidos 997 modelos (total: 997)
üì• Cargando modelos de facebook...
‚Üí Obtenidos 1000 modelos (total: 1000)
‚Üí Obtenidos 1000 modelos (total: 2000)
‚Üí Obtenidos 154 modelos (total: 2154)
üì• Cargando modelos de microsoft...
‚Üí Obtenidos 381 modelos (total: 381)
üì• Cargando modelos de Salesforce...
‚Üí Obtenidos 157 modelos (total: 157)
‚úÖ models_filtered.csv creado.
‚úÖ tags_filtered.csv creado.
