### **Import the dataset**

In [1]:
!pip3 install scikit-learn



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("datasets/Bank_Market_Dataset.csv")
df.head(5)

Unnamed: 0.1,Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [4]:
df = df.drop(columns= "Unnamed: 0")
df.head(5)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


In [6]:
num_cols = df.select_dtypes(include=["int64"]).columns
object_cols = df.select_dtypes(include=["object"]).columns

In [7]:
num_cols

Index(['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous'], dtype='object')

In [8]:
# remove the y from object_cols
object_cols = object_cols[:-1]

In [9]:
object_cols

Index(['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
       'month', 'poutcome'],
      dtype='object')

In [10]:
#convert the target variable to binary
df["y"] = df["y"].apply(lambda x: 1 if x == "yes" else 0)

In [11]:
df.describe()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,y
count,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0
mean,40.93621,1362.272058,15.806419,258.16308,2.763841,40.197828,0.580323,0.116985
std,10.618762,3044.765829,8.322476,257.527812,3.098021,100.128746,2.303441,0.321406
min,18.0,-8019.0,1.0,0.0,1.0,-1.0,0.0,0.0
25%,33.0,72.0,8.0,103.0,1.0,-1.0,0.0,0.0
50%,39.0,448.0,16.0,180.0,2.0,-1.0,0.0,0.0
75%,48.0,1428.0,21.0,319.0,3.0,-1.0,0.0,0.0
max,95.0,102127.0,31.0,4918.0,63.0,871.0,275.0,1.0


### **Preprocess the dataset**

In [12]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="mean")),
    ('std_scaler', StandardScaler())
])

object_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="most_frequent")),
    ('onehot', OneHotEncoder())
])

In [13]:
preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('obj', object_pipeline, object_cols)
])

In [14]:
X = df.drop(columns="y")
y = df["y"]

In [15]:
#applying the preprocessor to X.T
X = preprocessor.fit_transform(X)

In [16]:
X.shape

(45211, 51)

### **Ready the model inputs**

In [17]:
#uncomment this to install PyTorch for Apple Silicon

# !pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu

In [18]:
#convert the data to tensors
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.int64)

In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

In [20]:
#just to verify the shape and data type :)

print(X_train.shape)
print(X_train.dtype)

torch.Size([36168, 51])
torch.float32


In [21]:
#save the training and testing data to use in another file
torch.save(X_train, "X_train.pt")
torch.save(X_test, "X_test.pt")
torch.save(y_train, "y_train.pt")
torch.save(y_test, "y_test.pt")

In [22]:
#create TensorDatasets

trainDataset = TensorDataset(X_train, y_train)
testDataset = TensorDataset(X_test, y_test)

#creating the DataLoaders

trainLoader = DataLoader(trainDataset, batch_size= 2, shuffle=True)
testLoader = DataLoader(testDataset, batch_size=2, shuffle=True)

### **Define the Feed-Forward Neural Network**

In [23]:
class FeedForwardNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedForwardNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )
    def forward(self, x):
        return self.network(x)

In [24]:
#configure the model

input_size = X_train.shape[1]
hidden_size = 32
output_size = 2

model = FeedForwardNetwork(input_size, hidden_size, output_size)

In [25]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [26]:
# Train the model
epochs = 25
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for X_batch, y_batch in trainLoader:
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()
    
    epoch_loss = running_loss / len(trainLoader)
    epoch_accuracy = correct / total
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in testLoader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")

# Save the model
torch.save(model.state_dict(), "feedforward.pth")



Epoch 1, Loss: 0.2261, Accuracy: 0.9017
Epoch 2, Loss: 0.2116, Accuracy: 0.9058
Epoch 3, Loss: 0.2065, Accuracy: 0.9088
Epoch 4, Loss: 0.2027, Accuracy: 0.9089
Epoch 5, Loss: 0.1985, Accuracy: 0.9113
Epoch 6, Loss: 0.1975, Accuracy: 0.9114
Epoch 7, Loss: 0.1951, Accuracy: 0.9129
Epoch 8, Loss: 0.1941, Accuracy: 0.9140
Epoch 9, Loss: 0.1929, Accuracy: 0.9136
Epoch 10, Loss: 0.1914, Accuracy: 0.9140
Epoch 11, Loss: 0.1900, Accuracy: 0.9150
Epoch 12, Loss: 0.1885, Accuracy: 0.9143
Epoch 13, Loss: 0.1878, Accuracy: 0.9162
Epoch 14, Loss: 0.1875, Accuracy: 0.9164
Epoch 15, Loss: 0.1867, Accuracy: 0.9161
Epoch 16, Loss: 0.1862, Accuracy: 0.9168
Epoch 17, Loss: 0.1860, Accuracy: 0.9177
Epoch 18, Loss: 0.1850, Accuracy: 0.9178
Epoch 19, Loss: 0.1842, Accuracy: 0.9182
Epoch 20, Loss: 0.1842, Accuracy: 0.9183
Epoch 21, Loss: 0.1837, Accuracy: 0.9185
Epoch 22, Loss: 0.1832, Accuracy: 0.9185
Epoch 23, Loss: 0.1825, Accuracy: 0.9187
Epoch 24, Loss: 0.1820, Accuracy: 0.9192
Epoch 25, Loss: 0.1820, A