In [1]:
import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report,accuracy_score,mean_absolute_error,mean_squared_error


In [2]:
df=pd.read_csv("churn.csv")

In [3]:
X=df.drop("Churn",axis=1)
y=df["Churn"]

In [4]:
df.isnull().sum()

CustomerID           0
Age                  0
Gender               0
Tenure               0
Usage Frequency      0
Support Calls        0
Payment Delay        0
Subscription Type    0
Contract Length      0
Total Spend          0
Last Interaction     0
Churn                0
dtype: int64

In [5]:
X.shape,y.shape

((64374, 11), (64374,))

# Encoding + Scaling (Industry way)

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

num_cols = [
    "Age",
    "Tenure",
    "Usage Frequency",
    "Support Calls",
    "Payment Delay",
    "Total Spend"
]

cat_cols = [
    "Gender",
    "Subscription Type",
    "Contract Length",
    "Last Interaction"
]

preprocess = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
])


In [9]:
# Train test split 

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# 1. Logistic Regression (Baseline Model)

In [10]:
from sklearn.linear_model import LogisticRegression

model = Pipeline([
    ("prep", preprocess),
    ("clf", LogisticRegression())
])

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


ValueError: not enough values to unpack (expected 3, got 2)

In [53]:
#NaiveBayes

from sklearn.naive_bayes import GaussianNB

model = Pipeline([
    ("prep", preprocess),
    ("clf", GaussianNB())
])


In [54]:
#KNN

from sklearn.neighbors import KNeighborsClassifier

model = Pipeline([
    ("prep", preprocess),
    ("clf", KNeighborsClassifier(n_neighbors=5))
])


In [55]:
#SVM

from sklearn.svm import SVC

model = Pipeline([
    ("prep", preprocess),
    ("clf", SVC(kernel="rbf"))
])


In [56]:
#Decision Tree

from sklearn.tree import DecisionTreeClassifier

model = Pipeline([
    ("prep", preprocess),
    ("clf", DecisionTreeClassifier(max_depth=5))
])


In [57]:
#RandomFOreest

from sklearn.ensemble import RandomForestClassifier

model = Pipeline([
    ("prep", preprocess),
    ("clf", RandomForestClassifier(n_estimators=200))
])


In [58]:
#AdaBOOST

from sklearn.ensemble import AdaBoostClassifier

model = Pipeline([
    ("prep", preprocess),
    ("clf", AdaBoostClassifier(n_estimators=100))
])


In [61]:
pip install xgboost


Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Downloading xgboost-3.1.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   - -------------------------------------- 2.1/72.0 MB 13.0 MB/s eta 0:00:06
   -- ------------------------------------- 4.7/72.0 MB 12.4 MB/s eta 0:00:06
   ----- ---------------------------------- 9.4/72.0 MB 16.3 MB/s eta 0:00:04
   -------- ------------------------------- 14.4/72.0 MB 18.1 MB/s eta 0:00:04
   ---------- ----------------------------- 18.9/72.0 MB 18.3 MB/s eta 0:00:03
   ------------- -------------------------- 24.9/72.0 MB 20.0 MB/s eta 0:00:03
   ----------------- ---------------------- 32.2/72.0 MB 22.3 MB/s eta 0:00:02
   --------------------- ------------------ 38.0/72.0 MB 22.6 MB/s eta 0:00:02
   ------------------------- -------------- 46.1/72.0 MB 24.5 MB/s eta 0:00:

In [63]:
from xgboost import XGBClassifier

model = Pipeline([
    ("prep", preprocess),
    ("clf", XGBClassifier(
        n_estimators=300,
        max_depth=5,
        learning_rate=0.05
    ))
])


ModuleNotFoundError: No module named 'xgboost'