# Customer Churn Analysis
This project performs an analysis on a telecom dataset to identify customer churn patterns and predict churn using logistic regression.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Load dataset
df = pd.read_csv('data/telecom_customer_churn.csv')
df.head()

In [None]:
# Data Cleaning
df.dropna(inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.fillna(df.mean(numeric_only=True), inplace=True)
df.drop(['customerID'], axis=1, inplace=True)

In [None]:
# Convert categorical columns
df = pd.get_dummies(df, drop_first=True)
df.head()

In [None]:
# Feature selection and model training
X = df.drop('Churn_Yes', axis=1)
y = df['Churn_Yes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [None]:
# Model evaluation
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))