# Demo: TelecomAI en 5 minutos
Este notebook carga una muestra de datos, realiza preprocesamiento, genera una predicción rápida, visualiza importancias (coeficientes LR) y crea un snapshot estilo dashboard.

In [None]:
from pathlib import Path
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from data.preprocess import build_preprocessor

# Cargar muestra (rápida)
data_path = Path('users_behavior.csv')
df = pd.read_csv(data_path).head(1000)
features = ['calls','minutes','messages','mb_used']
target = 'is_ultra'
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train.shape, X_test.shape

In [None]:
# Cargar pipeline exportado si existe, sino entrenar rápidamente
export_path = Path('models/model_v1.0.0.pkl')
if export_path.exists():
    pipe = joblib.load(export_path)
else:
    pre = build_preprocessor(features)
    clf = LogisticRegression(C=1.0, penalty='l2', solver='liblinear', class_weight='balanced')
    pipe = Pipeline(steps=[('preprocess', pre), ('clf', clf)])
    pipe.fit(X_train, y_train)

sample = X_test.iloc[[0]]
pred = int(pipe.predict(sample)[0])
proba = float(pipe.predict_proba(sample)[0,1]) if hasattr(pipe,'predict_proba') else None
pred, proba

In [None]:
# Importancias: coeficientes de regresión logística (post-estandarización)
clf = pipe.named_steps.get('clf')
coefs = getattr(clf, 'coef_', None)
if coefs is not None:
    imp = pd.Series(coefs[0], index=features).sort_values()
    plt.figure(figsize=(6,3))
    imp.plot(kind='barh')
    plt.title('Importancias (coeficientes LR)')
    plt.tight_layout()
    Path('artifacts').mkdir(exist_ok=True)
    plt.savefig('artifacts/feature_importance.png', dpi=120)
    plt.show()
else:
    print('Coeficientes no disponibles para el estimador actual')

In [None]:
# Snapshot de 'dashboard': distribución de probabilidades
if hasattr(pipe, 'predict_proba'):
    probs = pipe.predict_proba(X_test)[:,1]
    plt.figure(figsize=(6,3))
    sns.histplot(probs, bins=20, kde=False)
    plt.xlabel('P(is_ultra=1)')
    plt.title('Distribución de probabilidades')
    plt.tight_layout()
    Path('artifacts').mkdir(exist_ok=True)
    plt.savefig('artifacts/dashboard_snapshot.png', dpi=120)
    plt.show()
else:
    print('El pipeline no soporta predict_proba')