# Imports

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import resample
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
import pickle

# Data

In [2]:
# Load data into a pandas DataFrame
data = pd.read_csv('/content/bank-full-clean.csv', sep=';')
data16 = pd.read_csv('/content/bank-full-clean16.csv', sep=';')
data15 = pd.read_csv('/content/bank-full-clean15.csv', sep=';')

<hr>

## Bank Dataset

In [3]:
data.shape

(43010, 17)

In [4]:
data.head()

Unnamed: 0,job,marital,education,default,balance,housing,loan,contact,day,month,campaign,pdays,previous,poutcome,y,log_age,log_duration
0,0,0,0,0,2143,0,0,0,5,0,1,-1,0,0,0,4.060443,5.56452
1,1,1,1,0,29,0,0,0,5,0,1,-1,0,0,0,3.78419,5.01728
2,2,0,1,0,2,0,1,0,5,0,1,-1,0,0,0,3.496508,4.330733
3,3,0,2,0,1506,0,0,0,5,0,1,-1,0,0,0,3.850148,4.521789
4,4,1,2,0,1,1,0,0,5,0,1,-1,0,0,0,3.496508,5.288267


In [5]:
# Separate the majority and minority classes
majority_class = data[data.y == 0]
minority_class = data[data.y == 1]

# Keep all samples from the minority class
sampled_minority = minority_class

# Choose a random sample of the same size from the majority class
sampled_majority = resample(majority_class, n_samples=len(minority_class), replace=False, random_state=42)

# Combine the balanced samples from both classes
balanced_data = pd.concat([sampled_majority, sampled_minority])

# Shuffle the rows of the balanced dataset
balanced_data = balanced_data.sample(frac=1, random_state=42)

# Save the balanced dataset to a new file
#balanced_data.to_csv('balanced_dataset.csv', index=False)

In [6]:
balanced_data.shape

(9958, 17)

In [7]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(balanced_data.drop('y', axis=1), balanced_data['y'], test_size=0.3)

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma=0.1)
svm_model.fit(X_train, y_train)

# Evaluate the model performance on the test set
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Accuracy: 0.83
Precision: 0.80
Recall: 0.87
F1-score: 0.83


<hr>


## Bank Dataset Without Default

In [8]:
data16.shape

(43010, 16)

In [9]:
data16.head()

Unnamed: 0,job,marital,education,balance,housing,loan,contact,day,month,campaign,pdays,previous,poutcome,y,log_age,log_duration
0,0,0,0,2143,0,0,0,5,0,1,-1,0,0,0,4.060443,5.56452
1,1,1,1,29,0,0,0,5,0,1,-1,0,0,0,3.78419,5.01728
2,2,0,1,2,0,1,0,5,0,1,-1,0,0,0,3.496508,4.330733
3,3,0,2,1506,0,0,0,5,0,1,-1,0,0,0,3.850148,4.521789
4,4,1,2,1,1,0,0,5,0,1,-1,0,0,0,3.496508,5.288267


In [10]:
# Separate the majority and minority classes
majority_class = data16[data16.y == 0]
minority_class = data16[data16.y == 1]

# Keep all samples from the minority class
sampled_minority = minority_class

# Choose a random sample of the same size from the majority class
sampled_majority = resample(majority_class, n_samples=len(minority_class), replace=False, random_state=42)

# Combine the balanced samples from both classes
balanced_data = pd.concat([sampled_majority, sampled_minority])

# Shuffle the rows of the balanced dataset
balanced_data = balanced_data.sample(frac=1, random_state=42)

# Save the balanced dataset to a new file
#balanced_data.to_csv('balanced_dataset.csv', index=False)

In [11]:
balanced_data.shape

(9958, 16)

In [12]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(balanced_data.drop('y', axis=1), balanced_data['y'], test_size=0.3)

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the SVM model
svm_model = SVC(kernel='rbf', C=10, gamma='scale', degree=2)
svm_model.fit(X_train, y_train)

# Evaluate the model performance on the test set
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Accuracy: 0.83
Precision: 0.81
Recall: 0.87
F1-score: 0.84


In [13]:
# Create a bagging classifier with the SVM model
bagging_svm = BaggingClassifier(svm_model, n_estimators=10, random_state=42)

# Fit the bagging classifier on the training data
bagging_svm.fit(X_train, y_train)

# Predict the labels of the test data
y_pred = bagging_svm.predict(X_test)

# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the performance metrics
print("Accuracy: {:.2f}".format(accuracy))
print("Precision: {:.2f}".format(precision))
print("Recall: {:.2f}".format(recall))
print("F1-score: {:.2f}".format(f1))

Accuracy: 0.83
Precision: 0.82
Recall: 0.86
F1-score: 0.84


## Save the model

In [14]:
# Save the SVM model to a file using pickle
with open('svm_model.pkl', 'wb') as f:
    pickle.dump(svm_model, f)

In [None]:
'''
%%time
# Define the range of hyperparameters to test
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'],
}

# Define the grid search object
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=6)

# Fit the grid search object to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and the corresponding score
print("Best hyperparameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)
'''

<hr>

## Bank Dataset Without "log_duration" and "default"

In [16]:
data15.shape

(43010, 15)

In [17]:
data15.head()

Unnamed: 0,job,marital,education,balance,housing,loan,contact,day,month,campaign,pdays,previous,poutcome,y,log_age
0,0,0,0,2143,0,0,0,5,0,1,-1,0,0,0,4.060443
1,1,1,1,29,0,0,0,5,0,1,-1,0,0,0,3.78419
2,2,0,1,2,0,1,0,5,0,1,-1,0,0,0,3.496508
3,3,0,2,1506,0,0,0,5,0,1,-1,0,0,0,3.850148
4,4,1,2,1,1,0,0,5,0,1,-1,0,0,0,3.496508


In [18]:
# Separate the majority and minority classes
majority_class = data15[data15.y == 0]
minority_class = data15[data15.y == 1]

# Keep all samples from the minority class
sampled_minority = minority_class

# Choose a random sample of the same size from the majority class
sampled_majority = resample(majority_class, n_samples=len(minority_class), replace=False, random_state=42)

# Combine the balanced samples from both classes
balanced_data = pd.concat([sampled_majority, sampled_minority])

# Shuffle the rows of the balanced dataset
balanced_data = balanced_data.sample(frac=1, random_state=42)

# Save the balanced dataset to a new file
#balanced_data.to_csv('balanced_dataset.csv', index=False)

In [19]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(balanced_data.drop('y', axis=1), balanced_data['y'], test_size=0.3)

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma=0.1)
svm_model.fit(X_train, y_train)

# Evaluate the model performance on the test set
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Accuracy: 0.68
Precision: 0.71
Recall: 0.60
F1-score: 0.65
