In [4]:
# run_model_local.py
import joblib
import pandas as pd
from features.features import extract_features

# === Rutas ===
MODEL_PATH = "models/logreg_phishing_final.joblib"
DATA_PATH = "limpieza/phishing/scoring/dataset_es_signal_strong.csv"
OUT_PATH = "predicciones_model_local.csv"
THRESHOLD = 0.425  # umbral del prototipo

# === Cargar modelo ===
model_data = joblib.load(MODEL_PATH)
if isinstance(model_data, dict):
    model = model_data.get("model") or model_data.get("pipeline")
else:
    model = model_data

print("✅ Modelo cargado correctamente:", type(model))

# === Cargar dataset ===
df = pd.read_csv(DATA_PATH)

# === Extraer features para todas las URLs ===
features_list = []
for url in df["url"]:
    try:
        feats = extract_features(url)
        features_list.append(feats)
    except Exception as e:
        print(f"[WARN] Error procesando {url}: {e}")
        features_list.append({key: 0 for key in [
            "domain_length","domain_entropy","num_params","trusted_path_token",
            "contains_percent","contains_equal","protocol","suspicious_path_token",
            "free_hosting","tld_group"
        ]})

X = pd.DataFrame(features_list)

# === Predicciones ===
probs = model.predict_proba(X)[:, 1]
df["prob_model_phishing"] = probs
df["label_model"] = (probs >= THRESHOLD).astype(int)

# === Guardar salida ===
df.to_csv(OUT_PATH, index=False)
print(f"✅ Predicciones guardadas en: {OUT_PATH}")
print(df[["url", "prob_model_phishing", "label_model"]].head(10))


✅ Modelo cargado correctamente: <class 'sklearn.pipeline.Pipeline'>
✅ Predicciones guardadas en: predicciones_model_local.csv
                                                 url  prob_model_phishing  \
0  http://0c4d4e6.wcomhost.com/banco-santander/pa...             0.971697   
1  http://0c4d4e6.wcomhost.com/Banco-Santander/pa...             0.971697   
2  http://actividadesinusuales-santander-tarjetas...             0.999560   
3  http://alerta-caixabank1.serveirc.com/caixa-vb...             0.957273   
4  http://alerta-caixabank1.serveirc.com/caixa-vb...             0.957273   
5  http://alerta-caixabank1.serveirc.com/caixa-vb...             0.957273   
6  http://alsheharymedical.com.ye/retail.santande...             0.999829   
7             http://bancosantanderspain.blogspot.tw             0.987297   
8   http://bbva.aviso-vigente.com/tarjetas/alert.php             0.992809   
9              http://bbva-ingress-seger-2-z.hstn.me             0.932586   

   label_model  
0        