In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.linear_model import LogisticRegression 
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import confusion_matrix , classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('credit_card_churn.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
for i in range (df.shape[1]):
    print(f'{df.columns[i]} : {df.iloc[:,i].unique()}')

In [10]:
df_Edu_level = pd.get_dummies(data=df['Education_Level'] , drop_first=True)
df_Marital_Status = pd.get_dummies(data=df['Marital_Status'] , drop_first=True)
df_Income_Category = pd.get_dummies(data=df['Income_Category'] , drop_first=True)
df_Card_Category = pd.get_dummies(data=df['Card_Category'] , drop_first=True)
df['Attrition_Flag'].replace({'Attrited Customer':0 , 'Existing Customer':1} , inplace=True)
df['Gender'].replace({'M':0 , 'F':1} , inplace=True)


In [12]:
df = pd.concat([df,df_Edu_level,df_Marital_Status,df_Income_Category,df_Card_Category] , axis=1)
df.drop(['Education_Level' , 'Marital_Status' , 'Income_Category' ,'Card_Category' ,  'CLIENTNUM'] , axis=1 , inplace=True)

In [None]:
df.head()

In [14]:
X = df.drop('Attrition_Flag' , axis=1)
y = df.Attrition_Flag

In [None]:
y.value_counts()

In [None]:
smote = SMOTE(sampling_strategy='minority')
x_sm , y_sm = smote.fit_sample(X,y)
y_sm.value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x_sm , y_sm , test_size=.25 , random_state=0)

In [None]:
X_train.shape , X_test.shape

In [None]:
scale = StandardScaler()
X_train_scaled = scale.fit_transform(X_train)
X_test_scaled = scale.transform(X_test)

In [None]:
def best_algo(X , y):
    algos = {
        'model_logistic' : {
            'model' : LogisticRegression() , 
            'para'  : {
                'fit_intercept' : [True , False],
                'C' : np.arange(1,3,0.1),
                'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
                'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
            }
        },
        
        'model_naive' : {
            'model' : BernoulliNB() , 
            'para' : {
                'alpha' : np.arange(1,10,0.1),
                'fit_prior' : [True , False],
            }
        },
        
        'model_RF' : {
            'model' : RandomForestClassifier(),
            'para' :{
                'criterion' : ["gini", "entropy"],
                #
                #'n_estimators' : [10,20,30,50,100],
                'bootstrap' : [True , False],
                'oob_score' : [True , False],
                'max_features' : ["auto", "sqrt", "log2"],
                'warm_start' : [True , False],
                #
                #'verbose' : [10,20,30,40,50],
            }
        }
    }
    scores = []
    for algo_name , config in algos.items():
        gcv = GridSearchCV(config['model'] , config['para'] , cv=10 , return_train_score=False)
        gcv.fit(X , y)
        scores.append({
            'model' : algo_name,
            'best_score' : gcv.best_score_,
            'best_para' : gcv.best_params_
        }
        )
    pd.set_option('display.max_colwidth' , None)    
    return pd.DataFrame(scores , columns=['model' , 'best_score' , 'best_para']) 


In [None]:
best_algo(X_train_scaled , y_train )

In [None]:
model_rf = RandomForestClassifier(bootstrap= True, criterion= 'gini', max_features= 'auto', oob_score= False, warm_start= True)

In [None]:
model_rf.fit(X_train_scaled , y_train)

In [None]:
model_rf.score(X_test_scaled , y_test)

In [None]:
pred_rf = model_rf.predict(X_test_scaled)
cm_rf = confusion_matrix(y_test,pred_rf)
clf_rf = classification_report(y_test,pred_rf)
plt.figure(figsize=(10,8))
print(clf_rf)
sns.heatmap(cm_rf , annot=True , fmt='d')

# NAIVE BAYES

In [None]:
model_naive = BernoulliNB(alpha= 9.800000000000008, fit_prior= True)

In [None]:
model_naive.fit(X_train_scaled , y_train)

In [None]:
model_naive.score(X_test_scaled , y_test)

In [None]:
pred_naive = model_naive.predict(X_test_scaled)
cm_naive = confusion_matrix(y_test,pred_naive)
clf_naive = classification_report(y_test,pred_naive)
plt.figure(figsize=(10,8))
print(clf_naive)
sns.heatmap(cm_naive , annot=True , fmt='d')

# LOGISTIC REGRESSION

In [None]:
model_logistic = LogisticRegression(C= 1.0, fit_intercept= True, penalty= 'l1', solver= 'liblinear')

In [None]:
model_logistic.fit(X_train_scaled , y_train)

In [None]:
model_logistic.score(X_test_scaled , y_test)

In [None]:
pred_log = model_logistic.predict(X_test_scaled)
cm_log = confusion_matrix(y_test,pred_log)
clf_log = classification_report(y_test,pred_log)
plt.figure(figsize=(10,8))
print(clf_log)
sns.heatmap(cm_log, annot=True , fmt='d')

# NEURAL NETWORK

In [None]:
model_NN = tf.keras.Sequential([
    tf.keras.layers.Dense(25 , input_shape=(25,),activation='relu'),
    tf.keras.layers.Dense(20 , activation='relu'),
    tf.keras.layers.Dense(15 , activation='relu'),
    tf.keras.layers.Dense(1 , activation='sigmoid'),
])

model_NN.compile(metrics=['accuracy'] , loss='binary_crossentropy' , optimizer='adam')
model_NN.fit(X_train_scaled,y_train,epochs=200,verbose=1)

In [None]:
pred_NN = model_NN.predict(X_test_scaled )

In [None]:
model_NN.evaluate(X_test_scaled,y_test)

In [None]:
predictions=[]
for i in pred_NN:
    if i>=0.5:
        i=1
    else:
        i=0
    predictions.append(i) 

In [None]:
cm_NN = confusion_matrix(y_test,predictions)

In [None]:
import seaborn as sns
plt.figure(figsize=(8,8))
sns.heatmap(cm_NN , annot=True , fmt='d')

In [None]:
print(classification_report(y_test,predictions))