In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df=pd.read_csv('bank-full.csv',delimiter=';')

In [None]:
df

In [None]:
df.select_dtypes(object).columns

In [None]:
df.select_dtypes(np.number).columns

In [None]:
df.y.value_counts()

In [None]:
df.columns

In [None]:
pd.crosstab(df['job'],df['y']).plot(kind='bar')

In [None]:
pd.crosstab(df['marital'],df['y']).plot(kind='bar')

# Most Successful Month

In [None]:
df[df['y'].str.strip()=='yes']['month'].value_counts().plot()

# Un-successful Months

In [None]:
df[df['y'].str.strip()=='no']['month'].value_counts().plot()

# Most Successful Duration

In [None]:
df[df['y'].str.strip()=='yes']['duration'].value_counts()[0:10].plot(kind='barh',color='purple')

# Impact of Housing on Term Deposit by the Client

In [None]:
pd.crosstab(df['housing'],df['y']).plot(kind='bar')

In [None]:
pd.crosstab(df['marital'],df['poutcome']).plot(kind='bar',stacked=True)


In [None]:
df.rename({'y':'term deposit'},axis=1,inplace=True)

In [None]:
# 11 pm est

In [None]:
pd.crosstab(df['campaign'],df['Term Deposit'])

In [None]:
df

In [None]:
sns.countplot(df[df['y'].str.strip()=='yes']['education'])

In [None]:
df[df['y'].str.strip()=='yes']['day'].value_counts().plot(kind='bar')

In [None]:
df[df['y'].str.strip()=='no']['day'].value_counts().plot(kind='bar')

In [None]:
df[df['y'].str.strip()=='yes']['month'].value_counts().plot(kind='bar')

In [None]:
df[df['y'].str.strip()=='no']['month'].value_counts().plot(kind='bar')

In [None]:
df[['marital','education']].value_counts()

In [None]:
df.corr()

> # Data Normalization

In [None]:
# Checking Values Distribution of Categorical features

In [None]:
for cat_col in cat_cols:
    print(f'{cat_col} === {df[[cat_col]].value_counts()}')
    print('===============================================')

# Separating Dependent and Independent Features

In [None]:
X=df.drop(['y'],axis=1)

In [None]:
y=df['y']

In [None]:
X

In [None]:
y

In [None]:
num_cols=X.select_dtypes(np.number).columns    #selecting numerical features names

In [None]:
cat_cols=X.select_dtypes(object).columns       #selecting categorical feature names

In [None]:
encoded_X=pd.get_dummies(X[cat_cols],drop_first=True)

In [None]:
encoded_X

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler=MinMaxScaler()

In [None]:
scaled_X=scaler.fit_transform(X[num_cols])

In [None]:
X=np.concatenate([scaled_X,encoded_X],axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=32)

> # Model Development

In [None]:
# Importing Models
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
#from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
# Importing Evaluation matrces
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix,classification_report, plot_confusion_matrix

# check the performance on diffrent regressor
models = []
models.append(('Support Vector Classifier', svm.SVC()))
models.append(('LogisitcRegression', LogisticRegression()))
models.append(('KNeighborsClassifier', KNeighborsClassifier()))
models.append(('RandomForestClassifier', RandomForestClassifier()))
#models.append(('AdaBoostClassifier', AdaBoostClassifier()))
models.append(('DecisionTreeClassifier', DecisionTreeClassifier()))


# prepare the cross-validation procedure
cv = KFold(n_splits=5, random_state=1, shuffle=True)

# metrices to store performance
acc = []
pre = []
f1 = []
con = []
rec = []


import time
i = 0
for name,model in models:
    i = i+1
    start_time = time.time()
    
    # Fitting model to the Training set
    clf = model
    clf.fit(X_train, y_train)
    
    # predict values
    y_pred = clf.predict(X_test)
    
    # Accuracy
    accuracy = accuracy_score(y_test, y_pred)
    acc.append(accuracy)
    # Precision
    precision = precision_score(y_test, y_pred, average=None)
    pre.append(precision)
    # Recall
    recall = recall_score(y_test, y_pred, average=None)
    rec.append(recall)
    # F1 Score
    f1_sco = f1_score(y_test, y_pred, average=None)
    f1.append(f1_sco)
    # Confusion Matrix
    confusion_mat = confusion_matrix(y_test, y_pred)
    con.append(confusion_mat)
    # Report
    report = classification_report(y_test, y_pred)
    
    # evaluate model
    scores = cross_val_score(clf, X, y, cv=cv, n_jobs=-1)



    print("+","="*100,"+")
    print('\033[1m' + f"\t\t\t{i}-For {name} The Performance result is: " + '\033[0m')
    print("+","="*100,"+")
    print('Accuracy : ', accuracy)   
    print("-"*50)
    print('F1 : ', f1_sco)
    print("-"*50)
    print('Reacll : ', recall)
    print("-"*50)
    print('Precision : ', precision)
    print("-"*50)
    print('cross validation accuracy : ', np.mean(scores))
    print("-"*50)
    print('Confusion Matrix....\n', confusion_mat)
    print("-"*50)
    print('Classification Report....\n', report)
    print("-"*50)
    print('Plotting Confusion Matrix...\n')
    plot_confusion_matrix(clf, X_test, y_test)
    plt.show()


    
    print("\t\t\t\t\t\t\t-----------------------------------------------------------")
    print(f"\t\t\t\t\t\t\t Time for detection ({name}) : {round((time.time() - start_time), 3)} seconds...")
    print("\t\t\t\t\t\t\t-----------------------------------------------------------")
    print()
    
pd.DataFrame({"Model": dict(models).keys(), "Accuracy": acc, "Precision": pre, "Recall": rec, "F1_Score": f1, "Confusion Matrix": con})