In [4]:
import pandas as pd,numpy as np, joblib, optuna
from imblearn.over_sampling import SMOTE
from sklearn.impute import KNNImputer
from sklearn.ensemble import BaggingClassifier,GradientBoostingClassifier, RandomForestClassifier,AdaBoostClassifier,ExtraTreesClassifier
from sklearn import tree
from sklearn.linear_model import SGDClassifier,RidgeClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB,GaussianNB,GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score, confusion_matrix

In [None]:
df = pd.read_csv('../2. Preprocessing/Processed data.csv')

In [8]:
labels = df['Churn']
features = df.drop(columns= ['Churn'],axis=1)

x_train, x_validation, y_train, y_validation = train_test_split(features, labels, random_state = 42, stratify = labels, test_size = 0.1)


sm = SMOTE(random_state=27)
x_train, y_train = sm.fit_resample(x_train, y_train)

In [90]:
models = {
    "LogisticRegression" : LogisticRegression(),
    "AdaBoost" : AdaBoostClassifier(DecisionTreeClassifier(),n_estimators=10),
    "GradientBoosting"      : GradientBoostingClassifier(),
    "BaggingClassifier"  : BaggingClassifier(tree.DecisionTreeClassifier(random_state=1)),
    "RandomForest"      : RandomForestClassifier(n_estimators=3),
    "eXtremeGradientBoosting"     : XGBClassifier(objective= 'binary:logistic'),
    "KNN"                : KNeighborsClassifier(n_neighbors = 30, weights = 'distance'),
    "DecisionTree"      : DecisionTreeClassifier(),
    "ExtraTreeClassifier"       : ExtraTreesClassifier(n_estimators=3)
}


for name, model in models.items():
    

    print(f'Using model: {name}')
    model.fit(x_train, y_train)
    joblib.dump(model,f'Saved Models/{name}.h5')
    print(f'Training Score: {model.score(x_train, y_train)}')
    
    y_pred = model.predict(x_validation)
    print(f'confusion_matrix:\n {confusion_matrix(y_validation, y_pred)}')
    print(f'accuracy_score: {accuracy_score(y_validation, y_pred)}')
    print(f'precision_score: {precision_score(y_validation, y_pred,average="macro")}')
    print(f'recall_score: {recall_score(y_validation, y_pred,average="macro")}')
    print(f'f1_score: {recall_score(y_validation, y_pred,average="macro")}')
    print('-------------')

Using model: LogisticRegression
Training Score: 0.574633997171362
Test Score: 0.5668462291870715
confusion_matrix:
 [[1641 1266]
 [ 503  674]]
accuracy_score: 0.5668462291870715
precision_score: 0.5564072357285736
recall_score: 0.568570897482098
f1_score: 0.568570897482098
-------------
Using model: AdaBoost
Training Score: 1.0
Test Score: 0.6158178256611165
confusion_matrix:
 [[2055  852]
 [ 717  460]]
accuracy_score: 0.6158178256611165
precision_score: 0.5459758737197762
recall_score: 0.5488692369135644
f1_score: 0.5488692369135644
-------------
Using model: GradientBoosting
Training Score: 0.8015175260884523
Test Score: 0.7083741429970617
confusion_matrix:
 [[2793  114]
 [1077  100]]
accuracy_score: 0.7083741429970617
precision_score: 0.5944975729913787
recall_score: 0.5228730404651241
f1_score: 0.5228730404651241
-------------
Using model: BaggingClassifier
Training Score: 0.9877489392607316
Test Score: 0.6865817825661117
confusion_matrix:
 [[2567  340]
 [ 940  237]]
accuracy_score

In [109]:
def objective(trial):
    # Suggest hyperparameters controlling complexity
    max_depth = trial.suggest_int('max_depth', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)

    # Create and train model
    model = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        min_samples_split=min_samples_split,
        random_state=42
    )
    model.fit(x_train, y_train)

    # Predict on validation set
    preds = model.predict(x_validation)

    # Use accuracy or another metric on validation set
    f1 = f1_score(y_validation, preds)

    # Optuna tries to maximize accuracy
    return f1

# Create study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=16)

print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)


[I 2025-06-04 23:13:20,794] A new study created in memory with name: no-name-a251fd25-8c58-4cf2-aa95-40af99a52580
[I 2025-06-04 23:13:21,830] Trial 0 finished with value: 0.26590909090909093 and parameters: {'max_depth': 11, 'min_samples_leaf': 6, 'min_samples_split': 2}. Best is trial 0 with value: 0.26590909090909093.
[I 2025-06-04 23:13:22,991] Trial 1 finished with value: 0.34802158273381295 and parameters: {'max_depth': 20, 'min_samples_leaf': 19, 'min_samples_split': 15}. Best is trial 1 with value: 0.34802158273381295.
[I 2025-06-04 23:13:24,128] Trial 2 finished with value: 0.34679981842941443 and parameters: {'max_depth': 28, 'min_samples_leaf': 22, 'min_samples_split': 22}. Best is trial 1 with value: 0.34802158273381295.
[I 2025-06-04 23:13:24,446] Trial 3 finished with value: 0.43094672548354257 and parameters: {'max_depth': 3, 'min_samples_leaf': 3, 'min_samples_split': 11}. Best is trial 3 with value: 0.43094672548354257.
[I 2025-06-04 23:13:25,673] Trial 4 finished with 

Best hyperparameters: {'max_depth': 3, 'min_samples_leaf': 3, 'min_samples_split': 11}
Best validation accuracy: 0.43094672548354257


In [113]:

clf = LazyClassifier(verbose=0, ignore_warnings=True)

# Fit and evaluate models
models, predictions = clf.fit(x_train, x_validation, y_train, y_validation)

models

  0%|          | 0/32 [00:00<?, ?it/s]

In [2]:
models

NameError: name 'models' is not defined