In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.utils import resample

# 1. Leer los datos
churnData = pd.read_csv('files_for_lab/Customer-Churn.csv')

# 2. Convertir 'TotalCharges' a tipo numérico y reemplazar valores nulos
churnData['TotalCharges'] = pd.to_numeric(churnData['TotalCharges'], errors='coerce')
churnData['TotalCharges'].fillna(0, inplace=True)

# 3. Seleccionar y escalar características
features = ['tenure', 'SeniorCitizen', 'MonthlyCharges', 'TotalCharges']
X = churnData[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. Dividir los datos en conjuntos de entrenamiento y prueba
y = churnData['Churn']
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 5. Construir y evaluar el modelo inicial
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy del modelo inicial: {accuracy:.2f}")

# 6. Manejo del desequilibrio de datos
class_counts = churnData['Churn'].value_counts()
majority_class = churnData[churnData['Churn'] == 'No']
minority_class = churnData[churnData['Churn'] == 'Yes']
minority_class_upsampled = resample(minority_class, replace=True, n_samples=len(majority_class), random_state=42)
balanced_data = pd.concat([majority_class, minority_class_upsampled])

X_balanced = balanced_data[features]
y_balanced = balanced_data['Churn']
X_train_balanced, X_test_balanced, y_train_balanced, y_test_balanced = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

model.fit(X_train_balanced, y_train_balanced)
y_pred_balanced = model.predict(X_test_balanced)
accuracy_balanced = accuracy_score(y_test_balanced, y_pred_balanced)
print(f"Accuracy del modelo balanceado: {accuracy_balanced:.2f}")


Accuracy del modelo inicial: 0.81
Accuracy del modelo balanceado: 0.74
