### DIFFERENT CLASSIFICATION MODELS FOR THE DATA

In [3]:
import pandas as pd 
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

In [4]:
data = pd.read_csv("../data/cleaned_data.csv") 

In [5]:
features = data.drop("Churn", axis=1)
target = data["Churn"] 

In [6]:
trainX, testX, trainY, testY = train_test_split(features, target, test_size=0.2, random_state=64)

In [7]:
numeric_features = list(trainX.select_dtypes(include=['number']).columns)
categorical_features = list(trainX.select_dtypes(include=['object','category']).columns)

preprocessor = ColumnTransformer(
    transformers = [
        ('numeric', StandardScaler(), numeric_features),
        ('categ', OneHotEncoder(), categorical_features)
    ]
) 

#### MODEL

LOGISTIC REGRESSION

In [8]:
from sklearn.linear_model import LogisticRegression

In [11]:
logistic_classifier = LogisticRegression(C=0.1, solver='liblinear')
logistic_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', logistic_classifier)
]) 

logistic_pipeline.fit(trainX, trainY) 

In [12]:
predY = logistic_pipeline.predict(testX)
accuracy_score(testY, predY) 

0.8503988440450506

RANDOM FOREST

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
random_forest_classifier = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, min_samples_leaf=2)

random_forest_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', random_forest_classifier)
]) 

random_forest_pipeline.fit(trainX, trainY) 

In [16]:
predY = random_forest_pipeline.predict(testX)
accuracy_score(testY, predY) 

0.9331070248015677

GRADIENT BOOSTING CLASSIFIER

In [17]:
from sklearn.ensemble import GradientBoostingClassifier

In [19]:
gradient_boost_classifier = GradientBoostingClassifier(max_depth = 10, random_state=42)

gradient_boost_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', gradient_boost_classifier)
]) 

gradient_boost_pipeline.fit(trainX, trainY) 

In [20]:
predY = gradient_boost_pipeline.predict(testX)
accuracy_score(testY, predY) 

0.9356802121889907

XG BOOST CLASSIFIER

In [21]:
import xgboost as xgb

In [22]:
xgb_classifier = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=7)

xgb_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', xgb_classifier)
]) 

xgb_pipeline.fit(trainX, trainY) 

In [23]:
predY = xgb_pipeline.predict(testX)
accuracy_score(testY, predY) 

0.9344332059935473