# Load Dataset

In [2]:
# import libraires
import pandas as pd
import joblib
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

In [3]:
df=pd.read_csv("Selected_features.csv")

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15288 entries, 0 to 15287
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Unnamed: 0            15288 non-null  int64  
 1   Total Logins          15288 non-null  float64
 2   Tickets Raised        15288 non-null  float64
 3   Customer Tenuer Year  15288 non-null  int64  
 4   Sentiment Score       15288 non-null  float64
 5   Onboarding Year       15288 non-null  int64  
 6   Loans Accessed        15288 non-null  int64  
 7   Loans Taken           15288 non-null  int64  
 8   Monthly Avg Balance   15288 non-null  float64
 9   Churned               15288 non-null  int64  
dtypes: float64(4), int64(6)
memory usage: 1.2 MB


In [4]:
X= df.drop(["Unnamed: 0","Churned"],axis=1)
y=df["Churned"]

In [5]:
models={
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "RandomeForestClassifier":RandomForestClassifier(),
    "XGBClassifier":XGBClassifier(),
    "LGBMClassifier":LGBMClassifier(),
    "CatBoostClassifier":CatBoostClassifier(verbose=0),
}
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [6]:
results=[]
for name, model in models.items():
    print(f"Training {name}.....")
    model.fit(X_train,y_train)
    
    pred=model.predict(X_test)
    prob=model.predict_proba(X_test)[:,1]
    accuracy = accuracy_score(y_test,pred)
    f1 = f1_score(y_test,pred)
    auc = roc_auc_score(y_test,prob)
    results.append({
        'Model':name,
        'Accuracy':accuracy,
        'F1 Score': f1,
        'ROC AUC': auc
    })
    joblib.dump(model, f"selected{name}.pkl")
    print(f"{name} model saved")

print("Training Completed.")
result_df = pd.DataFrame(results)

Training LogisticRegression.....
LogisticRegression model saved
Training RandomeForestClassifier.....
RandomeForestClassifier model saved
Training XGBClassifier.....
XGBClassifier model saved
Training LGBMClassifier.....
[LightGBM] [Info] Number of positive: 6085, number of negative: 6145
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000604 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1065
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.497547 -> initscore=-0.009812
[LightGBM] [Info] Start training from score -0.009812


[WinError 2] The system cannot find the file specified
  File "C:\ProgramData\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\ProgramData\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\ProgramData\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\ProgramData\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


LGBMClassifier model saved
Training CatBoostClassifier.....
CatBoostClassifier model saved
Training Completed.


In [7]:
result_df

Unnamed: 0,Model,Accuracy,F1 Score,ROC AUC
0,LogisticRegression,0.837802,0.839482,0.917399
1,RandomeForestClassifier,0.913342,0.9141,0.971897
2,XGBClassifier,0.920209,0.919895,0.974438
3,LGBMClassifier,0.92119,0.920906,0.976397
4,CatBoostClassifier,0.923479,0.922976,0.977162
