In [None]:

# Caso 1: Predicción de Churn en Telecomunicaciones

# --- Parte 1: Fundamento Teórico ---
"""
Problema de clasificación binaria: predecir si un cliente abandonará el servicio.
Modelos usados: Regresión Logística, Random Forest, XGBoost, AdaBoost.
Métricas: Accuracy, Recall, Precision, F1, AUC-ROC.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier

# --- Carga de Datos ---
data = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data.dropna(inplace=True)
data.drop(['customerID'], axis=1, inplace=True)

# --- Preprocesamiento ---
binary_cols = ['Partner','Dependents','PhoneService','PaperlessBilling','Churn']
for col in binary_cols:
    data[col] = data[col].map({'Yes':1,'No':0})
data = pd.get_dummies(data, drop_first=True)

# --- Split ---
X = data.drop('Churn', axis=1)
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42,stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Modelos ---
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'AdaBoost': AdaBoostClassifier(random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_prob = model.predict_proba(X_test_scaled)[:,1]
    results[name] = {
        'report': classification_report(y_test,y_pred,output_dict=True),
        'roc_auc': roc_auc_score(y_test,y_prob)
    }
    print(f"\nModelo: {name}")
    print(classification_report(y_test,y_pred))

# --- Comparación ---
roc_scores = {k:v['roc_auc'] for k,v in results.items()}
plt.bar(roc_scores.keys(), roc_scores.values())
plt.title('Comparación de Modelos - AUC ROC')
plt.show()
