# Análisis de Telco Customer Churn

Notebook para exploración de datos y entrenamiento de modelos de Machine Learning

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Cargar dataset
df = pd.read_csv('../datasets/telco_churn.csv')
print(df.head())
print(f'\nDimensions: {df.shape}')

## Exploración de Datos

In [None]:
# Info del dataset
df.info()
print('\n')
print(df.describe())

In [None]:
# Análisis de Churn
print('Distribución de Churn:')
print(df['Churn'].value_counts())
print('\nProporción:')
print(df['Churn'].value_counts(normalize=True))

## Preprocesamiento y Modelado

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Preparar datos
df_copy = df.copy()
df_copy['TotalCharges'] = pd.to_numeric(df_copy['TotalCharges'], errors='coerce')
df_copy = df_copy.dropna()

# Codificar variables categóricas
for col in df_copy.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df_copy[col] = le.fit_transform(df_copy[col])

X = df_copy.drop('Churn', axis=1)
y = df_copy['Churn']

# Normalizar
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print(f'Tamaño de entrenamiento: {X_train.shape}')
print(f'Tamaño de prueba: {X_test.shape}')

In [None]:
# Entrenar Regresión Logística
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)

print(f'Logistic Regression Accuracy: {lr_acc:.4f}')
print('\nConfusion Matrix:')
print(confusion_matrix(y_test, lr_pred))
print('\nClassification Report:')
print(classification_report(y_test, lr_pred))

In [None]:
# Entrenar KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
knn_acc = accuracy_score(y_test, knn_pred)

print(f'KNN (k=5) Accuracy: {knn_acc:.4f}')
print('\nConfusion Matrix:')
print(confusion_matrix(y_test, knn_pred))