In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [None]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
print("First 5 rows:")
print(df.head())
print("\nInfo:")
df.info()
print("\nDescription:")
print(df.describe())

In [None]:

df = df.drop(columns=["customerID"])
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors='coerce')
df.dropna(subset=['TotalCharges'], inplace=True)


print("Null values after cleaning:")
print(df.isnull().sum())

In [None]:

def plot_boxplot(df, column):
    plt.figure(figsize=(8, 5))
    sns.boxplot(x=df[column])
    plt.title(f'Boxplot for {column}')
    plt.show()

def plot_histogram(df, column):
    plt.figure(figsize=(8, 5))
    sns.histplot(df[column], bins=30, kde=True)
    plt.title(f'Histogram for {column}')
    plt.show()


plot_histogram(df, "tenure")
plot_boxplot(df, "MonthlyCharges")

In [None]:
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == "object":
        df[col] = le.fit_transform(df[col])

print("Data after encoding:")
print(df.head())

In [None]:
X = df.drop(columns=["Churn"])
y = df["Churn"]

print("Before SMOTE:", y.value_counts())

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print("After SMOTE:", y_resampled.value_counts())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)
print(f"Train shape: {X_train.shape}")
print(f"Test shape: {X_test.shape}")

In [None]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)
print("--- Decision Tree Results ---")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

In [None]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
print("--- Random Forest Results ---")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

In [None]:
xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric="logloss")
xgb.fit(X_train, y_train)

y_pred_xgb = xgb.predict(X_test)
print("--- XGBoost Results ---")
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))