In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import MinMaxScaler
import random
random.seed(0)
tf.random.set_seed(0)
np.random.seed(0)

## **Load the Data**

In [2]:
data = pd.read_csv('/content/bank-additional-full.csv')

In [3]:
data = data.drop_duplicates()
data.shape

(41176, 21)

In [4]:
data = data[data['job'] != 'unknown']
data = data[data['marital'] != 'unknown']
data = data[data['education'] != 'unknown']
data = data[data['default'] != 'unknown']
data = data[data['housing'] != 'unknown']
data = data[data['loan'] != 'unknown']
data = data[data['contact'] != 'unknown']
data = data[data['month'] != 'unknown']
data = data[data['day_of_week'] != 'unknown']
data = data[data['poutcome'] != 'unknown']

In [5]:
data['job'] = [0 if elem=='admin.' else 1 if elem=='blue-collar' else 2 if elem=='technician' else 3 if elem=='services' else 4 if elem=='management' else 5 if elem=='retired' else 6 if elem=='entrepreneur' else 7 if elem=='self-employed' else 8 if elem=='housemaid' else 9 if elem=='student' else 10 if elem=='unemployed' else 11 for elem in data['job']]
data['marital'] = [0 if elem=='married.' else 1 if elem=='single' else 2 if elem=='divorce' else 3 for elem in data['marital']]
data['education'] = [1 if elem=='university.degree' else 2 if elem=='high.school' else 3 if elem=='basic.9y' else 4 if elem=='professional.course' else 5 if elem=='basic.4y' else 6 if elem=='basic.6y' else 7 for elem in data['education']]
data['default'] = [1 if elem=='no' else 2 if elem=='yes' else 3 for elem in data['default']]
data['housing'] = [1 if elem=='no' else 2 if elem=='yes' else 3 for elem in data['housing']]
data['loan'] = [1 if elem=='no' else 2 if elem=='yes' else 3 for elem in data['loan']]
data['contact'] = [1 if elem=='cellular' else 2 for elem in data['contact']]
data['month'] = [0.5 if elem=='may' else 0.7 if elem=='jul' else 0.8 if elem=='aug' else 0.6 if elem=='jun' else 0.11 if elem=='nov' else 0.4 if elem=='apr' else 0.10 if elem=='oct' else 0.9 if elem=='sep' else 0.3 if elem=='mar' else 0.12 for elem in data['month']]
data['day_of_week'] = [0.10 if elem=='mon' else 0.40 if elem=='thu' else 0.30 if elem=='wed' else 0.20 if elem=='tue' else 0.50 for elem in data['day_of_week']]
data['poutcome'] = [1 if elem=='nonexistent' else 2 if elem=='failure' else 3 for elem in data['poutcome']]

In [6]:
data['y'] = [1 if elem=='no' else 0 for elem in data['y']]

In [7]:
data_no = data.loc[data['y']==1]
data_yes = data.loc[data['y']==0]

In [8]:
data_no_train = data_no[10000:25000]
data_no_test = data_no[:4000]
data_yes_train = data_yes[:3000]
data_yes_test = data_yes[3000:]

In [9]:
train = pd.DataFrame(pd.concat([data_no_train,data_yes_train]))
test = pd.DataFrame(pd.concat([data_no_test,data_yes_test]))

In [10]:
train = train.reset_index(drop = True)   #reset index for the train data
test = test.reset_index(drop = True)     #reset index for the test data

In [11]:
y_train = train['y']
y_test = test['y']

In [12]:
train = train.drop(['y'],axis=1)
test = test.drop(['y'],axis=1)

In [13]:
scaler = MinMaxScaler()
train = scaler.fit_transform(train)
train = np.array(train)
train = train / train.sum(axis=1, keepdims=True)
train = np.clip(train,0.0000005,None)

In [14]:
scaler = MinMaxScaler()
test = scaler.fit_transform(test)
test = np.array(test)
test = test / test.sum(axis=1, keepdims=True)
test = np.clip(test,0.0000005,None)

In [15]:
from sklearn.svm import SVC
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, accuracy_score

##**SVM**

In [16]:
# Train SVM classifier
print("Training SVM classifier...")
svm = SVC(kernel='linear', C=5.0, random_state=42)
svm.fit(train, y_train)

Training SVM classifier...


In [17]:
# Test SVM classifier
print("Testing SVM classifier...")
y_pred_svm = svm.predict(test)

Testing SVM classifier...


In [18]:
# Evaluate the performance of the classifier
print("F1 Score:", f1_score(y_test, y_pred_svm))
print("Precision:", precision_score(y_test, y_pred_svm))
print("Recall:", recall_score(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Accuracy:", accuracy_score(y_test, y_pred_svm))

F1 Score: 0.9486134840320672
Precision: 1.0
Recall: 0.90225
Confusion Matrix:
 [[ 858    0]
 [ 391 3609]]
Accuracy: 0.9195142033758749
