In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
URL = 'bank-full.csv'
data = pd.read_csv(URL)[:1000]

In [4]:
data.columns

Index(['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'y'],
      dtype='object')

In [5]:
age_ranges = [18, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95]
range_labels = [
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
    "10",
    "11",
    "12",
    "13",
    "14",
    "15",
]
data.age = pd.cut(data.age, age_ranges, labels=range_labels)


In [6]:
data.drop(['marital', 'contact', 'job', 'education', 'month'], axis=1, inplace=True)

In [7]:
data.dropna(axis=0, inplace=True, how='any')

In [8]:
data.poutcome.replace(
    ["unknown", "other", "failure", "success"], [0, 1, 2, 3], inplace=True
)

data.y.replace(["yes", "no"], [1, 0], inplace=True)
data.default.replace(["yes", "no"], [1, 0], inplace=True)
data.housing.replace(["yes", "no"], [1, 0], inplace=True)
data.loan.replace(["yes", "no"], [1, 0], inplace=True)


In [9]:
num_of_rows = data.shape[0]
train_size = int(num_of_rows * 0.8)
test_size = num_of_rows - train_size

train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]

In [10]:
x = np.array(train_data.drop(['y'], axis=1))
y = np.array(train_data['y'])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [11]:
x_test_out = np.array(test_data.drop(['y'], axis=1))
y_test_out = np.array(test_data.y)

In [12]:
log_reg = LogisticRegression(solver='lbfgs', max_iter=1000)
log_reg.fit(x_train, y_train)

log_reg_accuracy = log_reg.score(x_test, y_test)
log_reg_accuracy_validation = log_reg.score(x_test_out, y_test_out)

In [13]:
svc = SVC(gamma='auto')
svc.fit(x_train, y_train)

svc_accuracy = svc.score(x_test, y_test)
svc_accuracy_validation = svc.score(x_test_out, y_test_out)

In [14]:
tree = DecisionTreeClassifier()
tree.fit(x_train, y_train)

tree_accuracy = tree.score(x_test, y_test)
tree_validation = tree.score(x_test_out, y_test_out)

In [15]:
kn_classifier = KNeighborsClassifier()
kn_classifier.fit(x_train, y_train)

kn_classifier_accuracy = kn_classifier.score(x_test, y_test)
kn_classifier_validation = kn_classifier.score(x_test_out, y_test_out)

In [16]:
random_forest = RandomForestClassifier()
random_forest.fit(x_train, y_train)

random_forest_accuracy = random_forest.score(x_test, y_test)
random_forest_validation = random_forest.score(x_test_out, y_test_out)