# Customer Churn Prediction
**Goal:** Predict which customers are likely to churn using logistic regression.

**Tools:** Python, pandas, matplotlib, seaborn, scikit-learn
**Dataset:** Telco Customer Churn (Kaggle)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
# Load data
df = pd.read_csv("Customer_Churn_prediction.csv")
df.head()

In [None]:
# Data info and summary
df.info()
df.describe()

In [None]:
# EDA
sns.countplot(data=df, x='Churn')
plt.title("Churn Distribution")
plt.show()

sns.countplot(data=df, x='Contract', hue='Churn')
plt.title("Contract Type vs Churn")
plt.show()

sns.boxplot(data=df, x='Churn', y='MonthlyCharges')
plt.title("Monthly Charges by Churn")
plt.show()

In [None]:
# Preprocessing
df.drop('customerID', axis=1, inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})
df_encoded = pd.get_dummies(df, drop_first=True)

In [None]:
# Logistic Regression
X = df_encoded.drop('Churn', axis=1)
y = df_encoded['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=3000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

## Business Insights
- Customers with month-to-month contracts and short tenure are most likely to churn.
- Model achieved ~80% accuracy with logistic regression.
- Business can target high-risk segments for retention strategies.

In [None]:
# Optional: Export predictions
output = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
output.to_csv("churn_predictions.csv", index=False)

In [None]:
# Churn by Contract Type
import seaborn as sns
import matplotlib.pyplot as plt
sns.countplot(data=df, x='Contract', hue='Churn')
plt.title('Churn by Contract Type')
plt.xlabel('Contract Type')
plt.ylabel('Customer Count')
plt.xticks(rotation=20)
plt.show()

In [None]:
# Tenure distribution by churn
sns.histplot(data=df, x='tenure', hue='Churn', bins=30, kde=True)
plt.title('Tenure Distribution by Churn')
plt.show()

In [None]:
# Monthly Charges by Churn
sns.boxplot(data=df, x='Churn', y='MonthlyCharges')
plt.title('Monthly Charges vs Churn')
plt.show()