In [1]:
from ucimlrepo import fetch_ucirepo

bank_marketing = fetch_ucirepo(id=222)

X = bank_marketing.data.features
y = bank_marketing.data.targets

print(bank_marketing.metadata)

print(bank_marketing.variables)

{'uci_id': 222, 'name': 'Bank Marketing', 'repository_url': 'https://archive.ics.uci.edu/dataset/222/bank+marketing', 'data_url': 'https://archive.ics.uci.edu/static/public/222/data.csv', 'abstract': 'The data is related with direct marketing campaigns (phone calls) of a Portuguese banking institution. The classification goal is to predict if the client will subscribe a term deposit (variable y).', 'area': 'Business', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 45211, 'num_features': 16, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Age', 'Occupation', 'Marital Status', 'Education Level'], 'target_col': ['y'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 2014, 'last_updated': 'Fri Aug 18 2023', 'dataset_doi': '10.24432/C5K306', 'creators': ['S. Moro', 'P. Rita', 'P. Cortez'], 'intro_paper': {'ID': 277, 'type': 'NATIVE', 'title': 'A data-driven approach to predict the s

In [2]:
import pandas as pd

df = pd.concat([X, y], axis=1)

df = df.drop(columns='duration')

df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day_of_week,month,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,,5,may,1,-1,0,,no
1,44,technician,single,secondary,no,29,yes,no,,5,may,1,-1,0,,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,,5,may,1,-1,0,,no
3,47,blue-collar,married,,no,1506,yes,no,,5,may,1,-1,0,,no
4,33,,single,,no,1,no,no,,5,may,1,-1,0,,no


In [3]:
df[df.duplicated()]

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day_of_week,month,campaign,pdays,previous,poutcome,y
2871,44,services,single,secondary,no,0,yes,no,,14,may,1,-1,0,,no
4910,34,management,single,tertiary,no,0,yes,no,,21,may,1,-1,0,,no
12939,25,blue-collar,married,primary,no,0,no,no,cellular,7,jul,1,-1,0,,no
13662,47,services,married,secondary,no,0,yes,no,cellular,9,jul,1,-1,0,,no
15836,35,management,married,tertiary,no,0,yes,no,cellular,21,jul,3,-1,0,,no
21522,47,technician,married,secondary,no,0,no,no,cellular,19,aug,2,-1,0,,no
22157,31,management,single,tertiary,no,0,no,no,cellular,21,aug,2,-1,0,,no
22210,32,technician,single,tertiary,no,0,no,no,cellular,21,aug,2,-1,0,,no
22233,34,management,single,tertiary,no,0,no,no,cellular,21,aug,2,-1,0,,no
22418,30,technician,single,tertiary,no,0,no,no,cellular,22,aug,2,-1,0,,no


In [4]:
df.isna().sum()

age                0
job              288
marital            0
education       1857
default            0
balance            0
housing            0
loan               0
contact        13020
day_of_week        0
month              0
campaign           0
pdays              0
previous           0
poutcome       36959
y                  0
dtype: int64

In [5]:
# Handle null values
# For categorical columns, nulls often indicate 'unknown' - preserve this information
df['job'] = df['job'].fillna('unknown')
df['education'] = df['education'].fillna('unknown')
df['contact'] = df['contact'].fillna('unknown')
df['poutcome'] = df['poutcome'].fillna('none')  # No previous campaign

# Verify no nulls remain
df.isna().sum()

age            0
job            0
marital        0
education      0
default        0
balance        0
housing        0
loan           0
contact        0
day_of_week    0
month          0
campaign       0
pdays          0
previous       0
poutcome       0
y              0
dtype: int64

In [6]:
# categorical
    # Education mapping
edu_map = {'unknown': 0, 'primary': 1, 'secondary': 2, 'tertiary': 3}
df['education'] = df['education'].map(edu_map)


# One Hot encoding
df = pd.get_dummies(df, columns=['job', 'marital', 'contact', 'poutcome', 'month'], drop_first=True)

# binary
binary_cols = ['default', 'housing', 'loan', 'y']
for col in binary_cols:
    df[col] = df[col].map({'yes': 1, 'no': 0})

df.dtypes # print result

age                  int64
education            int64
default              int64
balance              int64
housing              int64
loan                 int64
day_of_week          int64
campaign             int64
pdays                int64
previous             int64
y                    int64
job_blue-collar       bool
job_entrepreneur      bool
job_housemaid         bool
job_management        bool
job_retired           bool
job_self-employed     bool
job_services          bool
job_student           bool
job_technician        bool
job_unemployed        bool
job_unknown           bool
marital_married       bool
marital_single        bool
contact_telephone     bool
contact_unknown       bool
poutcome_none         bool
poutcome_other        bool
poutcome_success      bool
month_aug             bool
month_dec             bool
month_feb             bool
month_jan             bool
month_jul             bool
month_jun             bool
month_mar             bool
month_may             bool
m

In [7]:
from perceptron import Perceptron
from trainer import Trainer

perceptron = Perceptron(2, 1)
trainer = Trainer(perceptron=perceptron, n_epochs=1000, learning_rate=0.01)

In [8]:
# Prepare data: extract age and balance features
X_features = df[['age', 'balance']].values  # shape: (samples, features)
y_target = df['y'].values  # shape: (samples,)

# Split into train/test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_features, y_target, test_size=0.2, random_state=42
)

# Transpose to match perceptron format: (features, samples)
X_train_T = X_train.T
X_test_T = X_test.T

print(f"Training set: {X_train_T.shape[1]} samples, {X_train_T.shape[0]} features")
print(f"Test set: {X_test_T.shape[1]} samples")
print(f"Class distribution in training: {y_train.sum()} positive, {len(y_train) - y_train.sum()} negative")

Training set: 36168 samples, 2 features
Test set: 9043 samples
Class distribution in training: 4198 positive, 31970 negative


In [10]:
# Train the perceptron
trainer.train(x_train=X_train_T, x_test=X_test_T, y_train=y_train, y_test=y_test)

# Make predictions on test set
y_pred = trainer.predict(X=X_test_T)
test_accuracy = trainer.accuracy(y_pred=y_pred, y_true=y_test)

print(f"\nFinal Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Predictions shape: {y_pred.shape}")
print(f"Sample predictions: {y_pred.flatten()[:10]}")

  return 1 / (1 + np.power(np.e, -Z))


Epoch 0: Loss 24.2604, Accuracy: 81.2%
Epoch 100: Loss 6.2560, Accuracy: 83.4%
Epoch 200: Loss 6.3294, Accuracy: 82.1%
Epoch 300: Loss 20.5437, Accuracy: 81.8%
Epoch 400: Loss 5.7943, Accuracy: 87.2%
Epoch 500: Loss 6.1846, Accuracy: 82.6%
Epoch 600: Loss 19.0959, Accuracy: 82.1%
Epoch 700: Loss 5.5066, Accuracy: 87.9%
Epoch 800: Loss 6.0831, Accuracy: 83.0%
Epoch 900: Loss 18.5876, Accuracy: 82.3%

Final Test Accuracy: 84.67%
Predictions shape: (1, 9043)
Sample predictions: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
