In [None]:
# 📉 Customer Churn Prediction
Predicting customer churn using logistic regression and the Telco dataset.

In [None]:
This notebook demonstrates how to predict customer churn using a supervised machine learning model (logistic regression). We use the Telco Customer Churn dataset and walk through a complete ML workflow: data loading, preprocessing, model training, and evaluation.

In [None]:
# Install required packages if not available
!pip install scikit-learn pandas matplotlib seaborn --quiet

In [None]:
# Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Baixar o dataset automaticamente
!wget https://raw.githubusercontent.com/blastchar/telco-customer-churn/master/WA_Fn-UseC_-Telco-Customer-Churn.csv

# Leitura
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()

In [None]:
df.dropna(inplace=True)
df.drop('customerID', axis=1, inplace=True)

In [None]:
# Encoding de variáveis categóricas
for col in df.select_dtypes(include='object'):
    if df[col].nunique() == 2:
        df[col] = LabelEncoder().fit_transform(df[col])
    else:
        df = pd.get_dummies(df, columns=[col], drop_first=True)

In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
### ✅ Conclusions

- Logistic Regression reached ~80% accuracy in predicting customer churn.
- Main preprocessing steps: label encoding, one-hot encoding, scaling.
- This approach can be improved with more advanced models, feature engineering, or hyperparameter tuning.

This project demonstrates a full ML pipeline suitable for production prototyping or portfolio use.