In [1]:
#!/usr/bin/env python3
#### Import all the required libraries
import pandas as pd #### Library for working with large datsets
import numpy as np #### Library for performing numerical calculations
import matplotlib.pyplot as plt #### Baasic Library for plotting graphs
#### Configuring Matplotlib to show Plots inline
%matplotlib inline 
plt.rcParams['figure.figsize'] = (12, 12) ### Setting the size of the Plots
import datetime as dt
from math import radians, cos, sin, asin, sqrt

In [30]:
#### Loading the Data
data = pd.read_csv('bank-additional-full.csv', sep=';')

In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             41188 non-null  int64  
 1   job             41188 non-null  object 
 2   marital         41188 non-null  object 
 3   education       41188 non-null  object 
 4   default         41188 non-null  object 
 5   housing         41188 non-null  object 
 6   loan            41188 non-null  object 
 7   contact         41188 non-null  object 
 8   month           41188 non-null  object 
 9   day_of_week     41188 non-null  object 
 10  duration        41188 non-null  int64  
 11  campaign        41188 non-null  int64  
 12  pdays           41188 non-null  int64  
 13  previous        41188 non-null  int64  
 14  poutcome        41188 non-null  object 
 15  emp.var.rate    41188 non-null  float64
 16  cons.price.idx  41188 non-null  float64
 17  cons.conf.idx   41188 non-null 

In [20]:
data['cons.price.idx'].unique()

array([93.994, 94.465, 93.918, 93.444, 93.798, 93.2  , 92.756, 92.843,
       93.075, 92.893, 92.963, 92.469, 92.201, 92.379, 92.431, 92.649,
       92.713, 93.369, 93.749, 93.876, 94.055, 94.215, 94.027, 94.199,
       94.601, 94.767])

In [32]:
clean_up_categoricals = {'job':{'housemaid':9, 'services':1, 'admin.':2, 'blue-collar':3, 'technician':4,
                               'retired':5, 'management':6, 'unemployed':7, 'self-employed':8, 'unknown':0,
                               'entrepreneur':10, 'student':11},
                         'marital' :{'married':3, 'single':1, 'divorced':2, 'unknown':0},
                         'education' :{'basic.4y':5, 'high.school':1, 'basic.6y':2, 'basic.9y':3,
                                   'professional.course':4, 'unknown':0, 'university.degree':6,'illiterate':7},
                         'default' :{'no':1, 'unknown':0, 'yes':2},
                         'housing' :{'no':1, 'unknown':0, 'yes':2},
                         'loan' :{'no':1, 'unknown':0, 'yes':2},
                         'contact' :{'telephone':1,'cellular':2},
                         'month' :{'jan':1,'feb':2,'mar':3,'apr':4,'may':5,'jun':6,
                                    'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12},
                         'day_of_week' :{'mon':1,'tue':2,'wed':3,'thu':4,'fri':5,'sat':6,'sun':7},
                         'poutcome' :{'nonexistent':0, 'failure':1, 'success':2} ,
                         'y' :{'no':0, 'yes':1}   
                         
                        }
data.replace(clean_up_categoricals, inplace=True)
data.head(10)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,9,3,5,1,1,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
1,57,1,3,1,0,1,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
2,37,1,3,1,1,2,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
3,40,2,3,2,1,1,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
4,56,1,3,1,1,1,2,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
5,45,1,3,3,0,1,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
6,59,2,3,4,1,1,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
7,41,3,3,0,0,1,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
8,24,4,1,4,1,2,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0
9,25,1,1,1,1,2,1,1,5,1,...,1,999,0,0,1.1,93.994,-36.4,4.857,5191.0,0


In [33]:
#### Extract Y & X from Data
Y = data['y'].values
X = data
X = X.drop(['y'],1)

#### Split X & Y into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size = 0.5, random_state = 0)


In [34]:
#### Feature Scaling - Lets scale the Features to ensure they are all in the same range
# Feature Scaling

from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
X_valid = sc_X.transform(X_valid)

In [46]:
from sklearn.svm import SVC

classifier = SVC(kernel='poly')
#classifier.fit(X_train, y_train)

### Predict the results
#y_pred = classifier.predict(X_test)

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = {'C':[1,2,3,4,5], 'kernel':('poly', 'linear','sigmoid')}
clf = GridSearchCV(classifier, parameters)
clf.fit(X_train,y_train)
clf.best_params_

In [36]:
### Lets create a Confusion Matrix to See how valid our accuracy score is
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
labels =['Pr 0', 'Pr 1','Pr 2', 'Pr 3']
print(*labels)
for line in cm:
    print(*line)


Pr 0 Pr 1 Pr 2 Pr 3
5383 121
436 239


In [45]:
#### Lets take a look at the classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.98      0.95      5504
           1       0.66      0.34      0.45       675

    accuracy                           0.91      6179
   macro avg       0.79      0.66      0.70      6179
weighted avg       0.89      0.91      0.90      6179

