### Import Required Packages

In [1]:
%matplotlib inline 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.stats import zscore
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [2]:
# Load the dataset
df = pd.read_csv("D:/Imarticus/Datasets/PredictingTermDepositSubscriptionbyaclient/Predicting Term Deposit Subscription by a client/Dataset/bank-additional-full.csv", delimiter = ";")
df

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,73,retired,married,professional.course,no,yes,no,cellular,nov,fri,...,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,yes
41184,46,blue-collar,married,professional.course,no,no,no,cellular,nov,fri,...,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,no
41185,56,retired,married,university.degree,no,yes,no,cellular,nov,fri,...,2,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,no
41186,44,technician,married,professional.course,no,no,no,cellular,nov,fri,...,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,yes


### EDA

In [3]:
df.dtypes

age                 int64
job                object
marital            object
education          object
default            object
housing            object
loan               object
contact            object
month              object
day_of_week        object
duration            int64
campaign            int64
pdays               int64
previous            int64
poutcome           object
emp.var.rate      float64
cons.price.idx    float64
cons.conf.idx     float64
euribor3m         float64
nr.employed       float64
y                  object
dtype: object

In [4]:
# checking for missing values
df.isnull().sum()

age               0
job               0
marital           0
education         0
default           0
housing           0
loan              0
contact           0
month             0
day_of_week       0
duration          0
campaign          0
pdays             0
previous          0
poutcome          0
emp.var.rate      0
cons.price.idx    0
cons.conf.idx     0
euribor3m         0
nr.employed       0
y                 0
dtype: int64

In [5]:
df.corr()

Unnamed: 0,age,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
age,1.0,-0.000866,0.004594,-0.034369,0.024365,-0.000371,0.000857,0.129372,0.010767,-0.017725
duration,-0.000866,1.0,-0.071699,-0.047577,0.02064,-0.027968,0.005312,-0.008173,-0.032897,-0.044703
campaign,0.004594,-0.071699,1.0,0.052584,-0.079141,0.150754,0.127836,-0.013733,0.135133,0.144095
pdays,-0.034369,-0.047577,0.052584,1.0,-0.587514,0.271004,0.078889,-0.091342,0.296899,0.372605
previous,0.024365,0.02064,-0.079141,-0.587514,1.0,-0.420489,-0.20313,-0.050936,-0.454494,-0.501333
emp.var.rate,-0.000371,-0.027968,0.150754,0.271004,-0.420489,1.0,0.775334,0.196041,0.972245,0.90697
cons.price.idx,0.000857,0.005312,0.127836,0.078889,-0.20313,0.775334,1.0,0.058986,0.68823,0.522034
cons.conf.idx,0.129372,-0.008173,-0.013733,-0.091342,-0.050936,0.196041,0.058986,1.0,0.277686,0.100513
euribor3m,0.010767,-0.032897,0.135133,0.296899,-0.454494,0.972245,0.68823,0.277686,1.0,0.945154
nr.employed,-0.017725,-0.044703,0.144095,0.372605,-0.501333,0.90697,0.522034,0.100513,0.945154,1.0


In [6]:
# one hot encoding
df = pd.get_dummies(df, columns=["job", "marital", "education", "default", "housing", "loan",
                                "contact", "month", "day_of_week", "poutcome"], drop_first = True)
df.sample(10)

Unnamed: 0,age,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,...,month_may,month_nov,month_oct,month_sep,day_of_week_mon,day_of_week_thu,day_of_week_tue,day_of_week_wed,poutcome_nonexistent,poutcome_success
14671,34,323,3,999,0,1.4,93.918,-42.7,4.961,5228.1,...,0,0,0,0,0,0,1,0,1,0
9233,31,118,4,999,0,1.4,94.465,-41.8,4.967,5228.1,...,0,0,0,0,0,0,0,0,1,0
24517,44,184,3,999,0,-0.1,93.2,-42.0,4.191,5195.8,...,0,1,0,0,1,0,0,0,1,0
14703,44,265,5,999,0,1.4,93.918,-42.7,4.961,5228.1,...,0,0,0,0,0,0,1,0,1,0
203,43,277,1,999,0,1.1,93.994,-36.4,4.857,5191.0,...,1,0,0,0,1,0,0,0,1,0
24844,31,346,1,999,0,-0.1,93.2,-42.0,4.153,5195.8,...,0,1,0,0,0,0,1,0,1,0
711,57,190,3,999,0,1.1,93.994,-36.4,4.857,5191.0,...,1,0,0,0,0,0,1,0,1,0
24383,52,98,1,999,0,-0.1,93.2,-42.0,4.191,5195.8,...,0,1,0,0,1,0,0,0,1,0
22218,30,244,3,999,0,1.4,93.444,-36.1,4.963,5228.1,...,0,0,0,0,0,1,0,0,1,0
19075,46,92,1,999,0,1.4,93.444,-36.1,4.968,5228.1,...,0,0,0,0,0,0,1,0,1,0


### SVM

In [7]:
X = df.drop(["y"], axis=1)
y = df[['y']]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

In [22]:
clf = svm.SVC(kernel='linear')
# C = 0.2, kernel='linear', random_state = 0, gamma = 1

In [23]:
clf.fit(X_train, y_train.values.ravel())

SVC(kernel='linear')

In [24]:
y_pred = clf.predict(X_test)

In [25]:
clf.score(X_train, y_train)

0.8938642433491728

In [26]:
clf.score(X_test, y_test)

0.8985999838148417

### Grid Search

In [1]:
# param_grid = {
#               'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
#               'kernel': ['rbf','linear','poly','sigmoid']}
 
# grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3)
# grid.fit(X_train, y_train.values.ravel())