In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC,SVR
from sklearn.metrics import f1_score,confusion_matrix,precision_score,recall_score
from sklearn.preprocessing import StandardScaler


In [3]:
df = pd.read_csv("UniversalBank.csv")

df.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB


In [5]:
df.isnull().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [6]:
df.nunique()

ID                    5000
Age                     45
Experience              47
Income                 162
ZIP Code               467
Family                   4
CCAvg                  108
Education                3
Mortgage               347
Personal Loan            2
Securities Account       2
CD Account               2
Online                   2
CreditCard               2
dtype: int64

In [7]:
x =df.drop(columns=["Personal Loan","ID","ZIP Code"])

y = df["Personal Loan"]

In [8]:
stand = StandardScaler()

stand_x = stand.fit_transform(x)

In [9]:
xtrain,xtest,ytrain,ytest = train_test_split(stand_x,y,train_size=0.8,random_state=42)

In [10]:
df["Personal Loan"].value_counts()

Personal Loan
0    4520
1     480
Name: count, dtype: int64

# it is a imbalanced data , so we need to use f1_score


In [11]:
model = SVC()

In [12]:
model.fit(xtrain,ytrain)

In [13]:
pred = model.predict(xtest)

In [14]:
confusion_matrix(ytest,pred)

array([[893,   2],
       [ 22,  83]])

In [25]:
precision_score(ytest,pred)

0.9764705882352941

In [27]:
recall_score(ytest,pred)

0.7904761904761904

In [15]:
f1_score(ytest,pred,pos_label=1)

0.8736842105263158

In [16]:
param = {
    # "kernel":['linear', 'poly', 'rbf', 'sigmoid'],
    "gamma":[0.001,0.01,0.1,1,"scale"],
    "C":[0.01,0.1,1,10,100]
}

In [17]:
grid  =  GridSearchCV(model,param,cv=5,n_jobs=-1)

grid.fit(xtrain,ytrain)

In [18]:
grid.best_params_

{'C': 10, 'gamma': 'scale'}

In [19]:
grid.score(xtest,ytest)

0.982