## Import Libraries

In [6]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from catboost import *
from lightgbm import *
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import f1_score

## Split data for train test

In [7]:
df = pd.read_csv('Explored.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)
y = df['Churn Value']
X = df.drop('Churn Value', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 1. Catboost

In [8]:
param_dist = {
    'iterations': [100, 200, 500, 1000],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'depth': [4, 6, 8, 10],
    'l2_leaf_reg': [1, 3, 5, 7, 9],
    'border_count': [32, 50, 100, 200],
    'bagging_temperature': [0, 0.2, 0.5, 1.0],
    'random_strength': [1, 2, 5, 10]
}
CatboostClass = CatBoostClassifier(logging_level='Silent')
random_search_cat = CatboostClass.randomized_search(
    param_distributions=param_dist,
    X=X_train,
    y=y_train,
    cv=5,
    n_iter=50,
    search_by_train_test_split=False,
    refit=True,
    plot=False,
    verbose=False
)
best_model = random_search_cat['params']
y_pred = CatboostClass.predict(X_test)
f_score_cat = f1_score(y_test, y_pred)
print(f"Best F1-Score: {f_score_cat}")
print(f"Best Parameters: {best_model}")

Best F1-Score: 0.8742058449809401
Best Parameters: {'border_count': 200, 'bagging_temperature': 0.5, 'random_strength': 1, 'depth': 4, 'learning_rate': 0.05, 'l2_leaf_reg': 3, 'iterations': 200}


## 2.LightGBM

In [9]:
model = LGBMClassifier(verbose=-1)
param_dist = {
    'n_estimators': [100, 200, 500, 1000],
    'learning_rate': [0.01, 0.05, 0.1, 0.3],
    'num_leaves': [20, 30, 40, 50],
    'max_depth': [4, 6, 8, 10],
    'subsample': [0.6, 0.7, 0.8, 0.9],
    'colsample_bytree': [0.6, 0.7, 0.8, 0.9],
    'min_child_weight': [1, 5, 10],
    'reg_alpha': [0, 0.1, 0.5, 1.0],
    'reg_lambda': [0, 0.1, 0.5, 1.0]
}

random_search_lgbm = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=50,
    scoring='f1',
    cv=5,
    random_state=42
)
random_search_lgbm.fit(X_train, y_train)
best_model = random_search_lgbm.best_estimator_
y_pred = best_model.predict(X_test)
f_score_lgbm = f1_score(y_test, y_pred)
print(f"Best F1-score: {f_score_lgbm}")
print(f"Best Parameters: {random_search_lgbm.best_params_}")

Best F1-score: 0.8691232528589581
Best Parameters: {'subsample': 0.9, 'reg_lambda': 1.0, 'reg_alpha': 1.0, 'num_leaves': 50, 'n_estimators': 1000, 'min_child_weight': 5, 'max_depth': 4, 'learning_rate': 0.01, 'colsample_bytree': 0.9}


## 3.RandomForest

In [10]:
model = RandomForestClassifier()

param_dist = {
    'n_estimators': [100, 200, 300, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [5, 7, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

random_search_rf = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=50,
    scoring='f1',
    cv=5,
    random_state=42
)
random_search_rf.fit(X_train, y_train)
best_model = random_search_rf.best_estimator_
y_pred = best_model.predict(X_test)
f_score_rf = f1_score(y_test, y_pred)
print(f"Best F1_score: {f_score_rf}")
print(f"Best Parameters: {random_search_rf.best_params_}")

Best F1_score: 0.8783610755441742
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 10, 'bootstrap': True}
