<p style="font-family: Arial; font-size:2.5em;color:crimson;">
    <b>1) Libraries and Dataset</b> 
 </p>

## Import Libraries

In [None]:
%matplotlib inline
## General Libraries and Visualization Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pgeocode 
import matplotlib.gridspec as gridspec
import warnings
warnings.filterwarnings('ignore')
## Machine Learning Libraries
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import ComplementNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn import preprocessing
from sklearn.metrics import classification_report,confusion_matrix,precision_score,jaccard_score,recall_score,f1_score

## Import and Read Dataset

In [None]:
df = pd.DataFrame(pd.read_csv('C:/Users/Gagan Sharma/OneDrive/Desktop/Data/Bank_Loan_Granting.csv'))
df     

### Samples
    - This dataset has 5000 sample in 14 columns.
### Features :
     ID : customer ID
     Age : age of customer
     Experience : the number of years that customer has experience
     Income : the customer's annual earnings
     ZIP Code : the postal code of customer's location
     Family : the number of people in customer's family
     CCAvg : the average of monthly credit card spending
     Education : the level of customer's Education(undergraduate,graguate,professional)
     Mortgage : the Value of customer's mortgage(thing that the customer uses it as a guarantee to the bank)
     Securities Account : the customer has a security account or not(it's also known as an investment account)
     CD Account : the customer has a CD account or not(a type of savings account that lets the customer earn interest on a 
     fixed amount of money for a fixed period of time)
     Online : the customer uses the bank's online services or not
     CreditCard : the customer uses the bank's credit card or not
### Target
    - Personal Loan : the customer is granted loans or not

<p style="font-family: Arial; font-size:2.5em;color:crimson;">
    <b>2) Data cleaning</b> 
 </p>

## data information

In [None]:
df.info()

## Checking missing values

In [None]:
df.isna().sum()

In [None]:
df['CCAvg'] = df['CCAvg'].str.replace("/" , '.').astype(np.float64)
df

In [None]:
df.info()

## Checking duplicated data

In [None]:
print('Number of duplicated data : ' , len(df[df.duplicated()]))

In [None]:
df.describe()

## Replace negetive values of Exprience with Absolute of them

In [None]:
df['Experience'] = df['Experience'].abs()
df.describe()

In [None]:
df['Income'] = df['Income']/12
df

## Remove a useless column

In [None]:
df = df.drop(['ID'],axis=1)
df

## The Target Overview

In [None]:
print('*'*120)
print('number of customers : ',df.shape[0])
print('The number of Personal Loan = 0 : ',df['Personal Loan'].value_counts()[0])
print('The number of Personal Loan = 1 : ',df['Personal Loan'].value_counts()[1])
print('*'*120)

plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
sns.countplot(data=df, x='Personal Loan',palette = "Dark2")
plt.title("count plot for Target(Personal Loan)")
plt.subplot(1,2,2)
plt.pie([df['Personal Loan'].value_counts()[0],df['Personal Loan'].value_counts()[1]],
        labels=['Personal Loan = 0','Personal Loan = 1'],autopct='%1.1f%%',colors=['teal','chocolate'])
plt.title("Percent of Personal Loan")
plt.tight_layout()
plt.show()

In [None]:
descrete_col = df[['Family','Education','Securities Account','CD Account','Online','CreditCard']]
continuous_col = df[['Age','Experience','Income','CCAvg','Mortgage']]

In [None]:
for col in descrete_col :
    print('%s : ' %col , df[col].unique())
    
ax = sns.stripplot(data=descrete_col)
ax.set_xticklabels(ax.get_xticklabels(),rotation=30)
plt.ylabel('Class values of descrete Features')
plt.xlabel('Name of descrete Features')
plt.title('OverView of classes of descrete Features')
plt.show()

In [None]:
print('\n')
print('*'*120)

plt.figure(figsize=(12,1))
ax = sns.boxplot(x=df['ZIP Code'],color="lime", linewidth=.75)
plt.annotate('Noise',xy=(10500,0),xytext=(15000,0),arrowprops=dict(facecolor='red'))
ax.set_title('Distribution of ZIP Code')
plt.show()

fig = px.strip(df[df["ZIP Code"]>80000],
         x="Personal Loan",
         y="ZIP Code",
         stripmode='overlay')
fig.update_layout(annotations=[
        dict(
            x=0,
            y=96900,
            text="ZIP Codes 96651 Must Be Checked\n for analysis",
            textangle=0,
            ax=150,
            ay=-50,
            font=dict(
                color="red",
                size=14
            ),
            arrowcolor="red",
            arrowsize=3,
            arrowwidth=1,
            arrowhead=1)])
       
fig.show()

print('The number of categories in ZIP Code is : ',len(df['ZIP Code'].unique()))
print('There is a noise in zipcode feature, it is the smallest value of it :')
display(df['ZIP Code'].nsmallest(3).to_frame())
print('The range of ZIP Code without this noise is : (%d , %d)'%(df[df['ZIP Code']>80000]['ZIP Code'].min(),
                                           df[df['ZIP Code']>80000]['ZIP Code'].max()))

In [None]:
display(continuous_col.describe().T)
ax = sns.stripplot(data=continuous_col)
ax.set_xticklabels(ax.get_xticklabels(),rotation=30)
plt.ylabel('Range of Continuous Features')
plt.xlabel('Name of Continuous Features')
plt.show()

In [None]:
for col in continuous_col.columns:
    plt.figure(figsize=(12,0.5))
    ax = sns.boxplot(x=df[col],color="greenyellow", linewidth=.75)
    ax.set_title(f'Distribution of {col} feature')
    plt.show()

In [None]:
## df1 is just for visualizations related to zipcodes
df1 = df.copy()
df = df.drop(['ZIP Code'],axis=1)
df1.drop(index=384,inplace=True)
df1.reset_index(drop=True, inplace =True)
df1

## Insert Latitude, Longitude, County and Place name to df1 according to zipcode

In [None]:
nomi = pgeocode.Nominatim('us')
df2 = []
df3 = []
df4 = []
df5 = []

for zipCode in df1['ZIP Code'] :
    a = nomi.query_postal_code(zipCode)
    df2.append(a['latitude'])
    df3.append(a['longitude'])
    df4.append(a['county_name'])
    df5.append(a['place_name'])
    
df1['Lat'] = pd.DataFrame(df2)
df1['Lon'] = pd.DataFrame(df3)
df1['County'] = pd.DataFrame(df4)
df1['Place'] = pd.DataFrame(df5)
df1

In [None]:
df1.describe(include='all')

In [None]:
df1[df1['Lat'].isna()]['ZIP Code'].unique()

In [None]:
miss_zipcodes = [92717,92634,92709]
miss_zipcodes_info = [[33.6462,-117.8397,'Orange','Irvine'],[33.8739,-117.9028,'Orange','Fullerton']
                      ,[33.67,-117.73,'Orange','Irvine']]

for i,zcode in enumerate(miss_zipcodes) :
    for j,info in enumerate(['Lat','Lon','County','Place']) :
        df1.loc[df1['ZIP Code']==zcode,info]=miss_zipcodes_info[i][j]

In [None]:
df1.isna().sum()

In [None]:
df1 = df1.dropna()
display(df1.isna().sum())
display(df1)

In [None]:
fig = px.strip(df1,
         x="Personal Loan",
         y="ZIP Code",
         stripmode='overlay')
fig.update_layout(annotations=[
        dict(
            x=0,
            y=96900,
            text="ZIP Codes 96651 have been removed as unknown zipcodes",
            textangle=0,
            ax=150,
            ay=-50,
            font=dict(
                color="red",
                size=14
            ),
            arrowcolor="red",
            arrowsize=3,
            arrowwidth=1,
            arrowhead=1)])
       
fig.show()

<p style="font-family: Arial; font-size:2.5em;color:crimson;">
    <b>3) Data analysis and visualization</b> 
 </p>

In [None]:
plt.figure(figsize=(15,10))
for i,col in enumerate(list(descrete_col.columns)):
    plt.subplot(2,3,i+1)
    sns.countplot(data=df, x=col)
    plt.title(f"count plot for $\\mathbf{{{col}}}$")
plt.tight_layout()
plt.show()
#############################################################
descrete_cols_categories=[]
descrete_cols_categories_count=[]

for col in list(descrete_col.columns):
    pie_name1=[]
    pie_value1=[]
    for j in range(len(descrete_col[col].unique())) :
        pie_name1.append(descrete_col[col].unique()[j])
        pie_value1.append(descrete_col[descrete_col[col]==pie_name1[j]][col].count())
    descrete_cols_categories.append(pie_name1)
    descrete_cols_categories_count.append(pie_value1)

print('*'*120)
print('Descrete columns name : \n' , list(descrete_col.columns))
print('Descrete columns categories : \n' , descrete_cols_categories)   
print('Descrete columns categories count : \n' , descrete_cols_categories_count)
print('\n')
print('*'*120)
#############################################################
rows = 3
cols = 2

# Create subplots
fig = make_subplots(rows=rows, cols=cols, specs=[[{"type": "pie"}, {"type": "pie"}],[{"type": "pie"}, {"type": "pie"}],
                                           [{"type": "pie"}, {"type": "pie"}]])


for i in range(rows * cols) :
    fig.add_trace(go.Pie(labels=descrete_cols_categories[i], values=descrete_cols_categories_count[i], name=descrete_col.columns[i]),
                  row=int(np.ceil((i+1)/2)), col=(i%2)+1)


fig.update_layout(margin=dict(t=35, b=35, l=35, r=35))
fig.update(layout_title_text='Percent Of values in descrete columns',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()

In [None]:
plt.figure(figsize=(15,10))
for i,col in enumerate(list(descrete_col.columns)):
    plt.subplot(2,3,i+1)
    sns.countplot(data=df, x=col,hue='Personal Loan')
    plt.title(f"count plots for $\\mathbf{{{col}}}$")
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(18,12))
for i,col in enumerate(continuous_col.columns):
    plt.subplot(3,2,i+1)
    sns.histplot(x=df[col],hue = df['Personal Loan'],kde=True)
    plt.title(f"Personal loan Vs $\\mathbf{{{col}}}$",fontsize=18)

plt.tight_layout()
plt.show()

#############################################################################
print('\n')
print('*'*120)

plt.figure(figsize=(15,10))
for i,col in enumerate(continuous_col.columns):
    plt.subplot(3,2,i+1)
    color=['blue','orange']
    for j in list(df['Personal Loan'].unique()) :
        sns.kdeplot(df[df['Personal Loan']==j][col],color=color[j],fill=True,label=j)
        plt.legend()
    plt.title(f"Distribution of Personal Loan on $\\mathbf{{{col}}}$ feature")
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize = (15,10),constrained_layout = True, dpi=100)
GridSpec = gridspec.GridSpec(ncols=1, nrows=2, figure= fig)
 
subfigure = fig.add_subfigure(GridSpec[0,:])
# Adding subplots to subfigure 
subplots = subfigure.subplots(1,2)

total_names=[["Customers' \nincome <= 8", "Customers' \nincome > 8"],["Mortgage = 0", "Mortgage > 0"]]
total_size=[[3788,1212],[3462,1538]]
sub_names=["Personal\n Loan = 1","Personal\n Loan = 0","Personal\n Loan = 1","Personal\n Loan = 0"]
sub_size=[[42,3746,438,774],[312,3150,168,1370]]

# Create colors
c1, c2, c3, c4=[plt.cm.Purples, plt.cm.Greens, plt.cm.Blues, plt.cm.Oranges]
colors = [[c1(0.8), c2(0.8)] ,[c1(0.6), c1(0.4), c2(0.6), c2(0.4)],[c3(0.8), c4(0.8)],[c3(0.6), c3(0.4), c4(0.6), c4(0.4)]]

for i,col in enumerate(['Income','Mortgage']):
    subplots[i].axis('equal')
    pie1, _ = subplots[i].pie(total_size[i], radius=0.8, labels=total_names[i],textprops={'fontsize': 15},
                              colors= colors[i*2])
    plt.setp( pie1, width=0.3, edgecolor='white')
    # Second Ring (Inside)
    pie2, _ = subplots[i].pie(sub_size[i], radius=0.85-0.3, 
    labels=sub_names, labeldistance=0.7,textprops={'fontsize': 14}, colors=colors[i*2+1])
    plt.setp( pie2, width=0.25, edgecolor='white')
    plt.margins(0,0)
    subplots[i].set_title(f'$\\mathbf{{{col}}}$ Vs Personal Loan',fontsize=18)
    
plt.show()

print('\n')
print('*'*120)

sns.histplot(x=df[df['Mortgage']>0]['Mortgage'],hue = df['Personal Loan'],kde=True)
plt.title(f'Personal loan Vs $\\mathbf{{{"Mortgage>0"}}}$')

plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.pairplot(df[['Age','Experience','Income','CCAvg','Mortgage','Personal Loan']],hue='Personal Loan')
plt.tight_layout()
plt.show()
print('*'*125)

In [None]:
display(df[['Personal Loan','Income','CCAvg','Mortgage']].corr())
display(df[['Income','CCAvg','Mortgage']].corr())


plt.figure(figsize=(8,6))
for i,col in enumerate(['CCAvg','Mortgage']):
    plt.subplot(1,2,i+1)
    sns.scatterplot(data=df, x=col,y='Income',hue='Personal Loan')
    plt.title(f"$\\mathbf{{{col}}}$ Vs Income")
plt.tight_layout()
plt.show()

In [None]:
fig = px.scatter_3d(df, x='CCAvg', y='Income', z='CD Account',
              color='Personal Loan')
fig.show()
####################################################################
fig = px.scatter_3d(df, x='Mortgage', y='Income', z='CD Account',
              color='Personal Loan')
fig.show()

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(1,2,1)
sns.kdeplot(df1['ZIP Code'],fill=True,color='blue',label='all Customers')
sns.kdeplot(df1[df1['Personal Loan']==1]['ZIP Code'],fill=True,color='orange',label='customers with\n personal loan=1')
plt.legend(loc='upper left')
plt.subplot(1,2,2)
ax = sns.boxplot(y=df1['ZIP Code'],x=df1['Personal Loan'],color="green", linewidth=0.75)
ax = sns.stripplot(y=df1['ZIP Code'],x=df1['Personal Loan'],color="orange",linewidth=0.75)
ax.yaxis.tick_right()
ax.yaxis.set_label_position('right')

In [None]:
fig = px.scatter_mapbox(
    df1 ,
    lat="Lat",
    lon="Lon",
    hover_name="County",
    color = "Personal Loan",
    color_continuous_scale = ["crimson",'darkblue'],
    hover_data=['Place','Income' , 'Mortgage' , "CreditCard"],
    zoom=10,
    height=400
    
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.update_layout(mapbox_bounds={"west": -200, "east": 80, "south": -20, "north": 90})
fig.show()

<p style="font-family: Arial; font-size:2.5em;color:crimson;">
    <b>4) Modeling(Machine Learning)</b> 
 </p> 

In [None]:
def Normalization(df,lst,rng) :
    ''' inputs
    df : the data frame that should be normal
    lst : list of columns of df that should be normal
    rng : the range of normalization 
    
        output
    df : normalized df_in  '''
    
    scaler = preprocessing.MinMaxScaler(feature_range = rng)
    df[lst] = scaler.fit_transform(df[lst])
    return df

### Four functions for different classification algorithms and their evaluation

## Logistic Regression Model

In [None]:
def LogReg(x,y,test_size,stratify) :
    ## Fist x and y split for train and test,then model is created and fit
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=test_size,random_state=0,stratify=stratify)
    Model = LogisticRegression(solver='liblinear')
    Model.fit(x_train,y_train.ravel())
    ## report of model evaluation is printed
    print('Accuracy of Logistic Regression Model: ' , np.round(metrics.accuracy_score(y_test,Model.predict(x_test)),3))
    Evaluation(Model,x,y,x_train,y_train,x_test,y_test)
    print('*'*120)

## Naive Bayes Model

In [None]:
def NaiveBayes(x,y,test_size,model,stratify) : ## Nive Bayes have 4 algorithms,The function takes it as an argument.  
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=test_size,random_state=0,stratify=stratify)
    Model = model()
    Model.fit(x_train,y_train.ravel())
    print('Accuracy of Model: ' , np.round(metrics.accuracy_score(y_test,Model.predict(x_test)),3))
    Evaluation(Model,x,y,x_train,y_train,x_test,y_test)
    print('*'*120)

## KNN Model

In [None]:
def KNN(x,y,test_size,k,stratify) :
    ## The function takes k as argument, these lists stores scores of k iterations of running knn algorithms
    train_score = []
    test_score = []
    best_k = 0
    best_score = 0
    
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=test_size,random_state=0,stratify=stratify)
    
    for i in range (1,k+1) :
        Model = KNeighborsClassifier(i)
        Model.fit(x_train,y_train.ravel())
        train_score.append(np.round(Model.score(x_train,y_train),3))
        test_score.append(np.round(Model.score(x_test,y_test),3))
        
    best_k = test_score.index(max(test_score))+1
    best_score = max(test_score)
    Model = KNeighborsClassifier(best_k)
    Model.fit(x_train,y_train.ravel())
    
    print('Model accuracy for k = %d : '%k , np.round(test_score,3))
    print('The best result is for k = %d with accuracy = %s .'%(best_k,best_score))
    Evaluation(Model,x,y,x_train,y_train,x_test,y_test)
    
    plt.plot(range(1,k+1),train_score,color='darkblue',label='train_accuracy')
    plt.plot(range(1,k+1),test_score,color='darkorange',label='test_accuracy')
    plt.xlabel('k')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()
    
    print('*'*120)

## Evaluation Function

In [None]:
def Evaluation(Model,x,y,x_train,y_train,x_test,y_test) :
    cv_score = (cross_val_score(Model, x_train, y_train.ravel(), cv = StratifiedKFold(10), scoring='accuracy')).mean()
    print('Train score : ',np.round(Model.score(x_train,y_train),3))
    print('Average Cross Validation Score : ',np.round(cv_score,3))
    print('jaccard_score : ',np.round(jaccard_score(y_test,Model.predict(x_test)),3))
    print('precision_score : ',np.round(precision_score(y_test,Model.predict(x_test)),3))
    print('recall_score : ',np.round(recall_score(y_test,Model.predict(x_test)),3))
    print('f1_score : ',np.round(f1_score(y_test,Model.predict(x_test)),3))
    print('Result of confusion matrix : ')
    print(confusion_matrix(y_test,Model.predict(x_test)))

## Test 1) Create and Evaluate Models on normalized data ( just 5 features ) 

In [None]:
#################### df normalization ######################
df_Normal1 = df.copy()
features1 = ['Age','Experience','Income','CCAvg','Mortgage']
df_Normal1 = Normalization(df_Normal1,features1,(1,5))
#################### x,y ##############################
x = df_Normal1.drop('Personal Loan',axis=1)
y = df_Normal1['Personal Loan'].values.reshape(-1,1)
###########################################################
## Logistic Regression
LogReg(x,y,0.1,None)

## NaiveBayes
print('Result of Gaussian Model : ')
NaiveBayes(x,y,0.1,GaussianNB,None)
print('Result of Complement Model : ')
NaiveBayes(x,y,0.1,ComplementNB,None)
print('Result of Multinomial Model : ')
NaiveBayes(x,y,0.1,MultinomialNB,None)
print('Result of Bernoulli Model : ')
NaiveBayes(x,y,0.1,BernoulliNB,None)

## KNN
print('Result of KNN Model : ')
KNN(x,y,0.1,20,stratify=None)

In [None]:
#################### x,y ##############################
x = df_Normal1.drop('Personal Loan',axis=1)
y = df_Normal1['Personal Loan'].values.reshape(-1,1)
###########################################################
## Logistic Regression
LogReg(x,y,0.1,y)

## NaiveBayes
print('Result of Gaussian Model : ')
NaiveBayes(x,y,0.1,GaussianNB,y)
print('Result of Complement Model : ')
NaiveBayes(x,y,0.1,ComplementNB,y)
print('Result of Multinomial Model : ')
NaiveBayes(x,y,0.1,MultinomialNB,y)
print('Result of Bernoulli Model : ')
NaiveBayes(x,y,0.1,BernoulliNB,y)

## KNN
print('Result of KNN Model : ')
KNN(x,y,0.1,20,stratify=y)

## Test 3) create and evaluate 6 Models on normalized data ( all features ) with stratify 

In [None]:
#################### df normalization ######################
df_Normal2 = df.copy()
features2 = ['Age','Experience','Income','Family',
           'CCAvg','Education','Mortgage','Securities Account','CD Account','Online'
           ,'CreditCard']
df_Normal2 = Normalization(df_Normal2,features2,(1,5))
#################### x,y ##############################
x = df_Normal2.drop('Personal Loan',axis=1)
y = df_Normal2['Personal Loan'].values.reshape(-1,1)
###########################################################
## Logistic Regression
LogReg(x,y,0.1,y)

## NaiveBayes
print('Result of Gaussian Model : ')
NaiveBayes(x,y,0.1,GaussianNB,y)
print('Result of Complement Model : ')
NaiveBayes(x,y,0.1,ComplementNB,y)
print('Result of Multinomial Model : ')
NaiveBayes(x,y,0.1,MultinomialNB,y)
print('Result of Bernoulli Model : ')
NaiveBayes(x,y,0.1,BernoulliNB,y)

## KNN
print('Result of KNN Model : ')
KNN(x,y,0.1,20,stratify=y)

# Final Model
## The best Model is KNN for this dataset with Accuracy = 0.974 and f1-score = 0.851

In [None]:
x = df_Normal2.drop('Personal Loan',axis=1).values
y = df_Normal2['Personal Loan'].values.reshape(-1,1)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.1,random_state=0,stratify=y)

k = 1
Model = KNeighborsClassifier(k)
Model.fit(x_train,y_train.ravel())
y_pred = Model.predict(x_test)


fig, ax = plt.subplots(figsize=(4, 4))
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix(y_test,y_pred), display_labels = ['Not be granted loans', 'be granted loans'])
cm_display.plot(ax=ax,colorbar=False, cmap='Purples')
ax.set_ylabel('Actual')
ax.set_xlabel('Predicted')
ax.xaxis.set_label_position('top')
plt.show()

### Convert train data, test data and predicted data to data frame just for visualization

In [None]:
df_train = pd.DataFrame(x_train,columns=['Age','Experience','Income','Family','CCAvg','Education','Mortgage',
                                         'Securities Account','CD Account','Online','CreditCard'])
df_train.insert(11,'Personal Loan',y_train)

df_test = pd.DataFrame(x_test,columns=['Age','Experience','Income','Family','CCAvg','Education','Mortgage',
                                         'Securities Account','CD Account','Online','CreditCard'])
df_test.insert(11,'Personal Loan',y_test)

df_pred = pd.DataFrame(x_test,columns=['Age','Experience','Income','Family','CCAvg','Education','Mortgage',
                                         'Securities Account','CD Account','Online','CreditCard'])
df_pred.insert(11,'Personal Loan',y_pred)

In [None]:
fig = plt.figure(figsize=(10,10))
fig.suptitle(f'$\\mathbf{{{"train"}}}$ Data Vs $\\mathbf{{{"predicted"}}}$ Data',y=0.905)
for i,feature in enumerate(['CCAvg','Mortgage']) :
    plt.subplot(2,1,i+1)
    sns.scatterplot(data=df_train, x=feature,y='Income',hue='Personal Loan')
    sns.scatterplot(data=df_pred, x=feature,y='Income',hue='Personal Loan',palette = ['Green','Red'])

plt.show()

In [None]:
rows = 3
cols = 2
colors = [['darkblue','orange'],['Green','red'],['purple','crimson']]

for j,feature in enumerate(['CCAvg','Mortgage','Education']) :
    plt.figure(figsize=(14,12))
    plt.subplot(rows,cols,j*2+1)
    sns.scatterplot(data=df_test, x=feature,y='Income',hue='Personal Loan',palette = colors[j])
    plt.title(f'$\\mathbf{{{"Actual"}}}$ Data')
    if j == 2 :
        plt.legend([],[], frameon=False)
    plt.subplot(rows,cols,j*2+2)
    sns.scatterplot(data=df_pred, x=feature,y='Income',hue='Personal Loan',palette = colors[j])
    plt.title(f'$\\mathbf{{{"predicted"}}}$ Data')
    if j == 2 :
        plt.legend([],[], frameon=False)
    plt.tight_layout()
    plt.show()

In [None]:
fig = px.scatter_3d(df_test, x='CCAvg', y='Income', z='CD Account',
              color='Personal Loan')

fig.update_layout(
 annotations=[
        dict(
            x=0.47,
            y=0.78,
            
            text="Actual Value",
            textangle=0,
            ax=-90,
            ay=0,
            font=dict(
                color="red",
                size=14
            ),
            arrowcolor="red",
            arrowsize=3,
            arrowwidth=1,
            arrowhead=1),])

fig.show()


fig = px.scatter_3d(df_pred, x='CCAvg', y='Income', z='CD Account',
              color='Personal Loan')
fig.update_layout(
 annotations=[
        dict(
            x=0.47,
            y=0.78,
            
            text="Incorrect Predicted Value",
            textangle=0,
            ax=-130,
            ay=0,
            font=dict(
                color="red",
                size=14
            ),
            arrowcolor="red",
            arrowsize=3,
            arrowwidth=1,
            arrowhead=1),])
fig.show()