In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [2]:
df = pd.read_csv("Bank_Personal_Loan_Modelling.csv")

In [3]:
print("Shape : ", df.shape)
df.head()

Shape :  (5000, 14)


Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [4]:
# Deleting Columns
df.drop(["ID", "ZIP Code"],axis=1,inplace=True)
df["Experience"] = abs(df["Experience"])

In [5]:
df.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

In [6]:
df = df[['Age', 'Experience', 'Income', 'Family', 'CCAvg','Education', 'Mortgage', 'Securities Account','CD Account', 'Online', 'CreditCard', 'Personal Loan']]

In [7]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values

### Train Test Split

In [8]:
x,y = X,Y

In [9]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=69)

In [10]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [11]:
x_train.shape, y_train.shape

((3750, 11), (3750,))

# PyTorch 

In [12]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [13]:
BATCH_SIZE = 32
LEARNING_RATE = 0.003
EPOCH = 50

In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [15]:
train_x = torch.from_numpy(x_train).to(torch.float32)
train_y = torch.from_numpy(y_train).to(torch.float32)

In [16]:
train_x.shape, train_y.shape

(torch.Size([3750, 11]), torch.Size([3750]))

In [17]:
data = TensorDataset(train_x,train_y)
data = DataLoader(data,batch_size=BATCH_SIZE,shuffle=True)

## Building Model

In [18]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super(Model,self).__init__()
        
        self.layer1 = torch.nn.Linear(11,16)
        self.layer2 = torch.nn.Linear(16,1)
        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [19]:
model = Model()
# model.to(device)
print(model)

Model(
  (layer1): Linear(in_features=11, out_features=16, bias=True)
  (layer2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)


## Defining Loss function and Optimizer

In [20]:
loss_function = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=LEARNING_RATE)

## Training

In [21]:
training_loss = [0]*EPOCH
training_accuracy = [0]*EPOCH

for i in range(1,EPOCH+1):
    epoch_loss = 0
    accuracy = 0
    for x_batch,y_batch in data:
        # x_batch,y_batch = x_batch.to(device),y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(x_batch)
        
        loss = loss_function(y_pred,y_batch.unsqueeze(1))
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item() * len(x_batch)
        cnt = (torch.where(y_pred>=0.5, 1, 0) == y_batch.unsqueeze(1)).sum().float()
        accuracy += cnt.item()
    if(i%10 == 0):
        print(i,epoch_loss/len(data.dataset),(accuracy)/len(data.dataset))

10 0.06616834358274937 0.9762666666666666
20 0.05173256015777588 0.9808
30 0.04539471660504738 0.9848
40 0.04151624689574043 0.9864
50 0.03941767498505845 0.9888


## Testing 

In [22]:
test_x = torch.from_numpy(x_test).to(torch.float32)
test_y = torch.from_numpy(y_test).to(torch.float32)

In [23]:
test = TensorDataset(test_x,test_y)
test = DataLoader(test,batch_size=1)

In [24]:
y_pred = model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
accuracy = (y_pred == test_y).sum().float().item() / len(test.dataset)
print(accuracy)

0.9824


In [25]:
from sklearn.metrics import classification_report
py_report=classification_report(y_pred,test_y)
print(py_report)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1135
           1       0.90      0.90      0.90       115

    accuracy                           0.98      1250
   macro avg       0.95      0.95      0.95      1250
weighted avg       0.98      0.98      0.98      1250



## Genetic Algorithm

In [26]:
BATCH_SIZE = 1

In [27]:
data = TensorDataset(train_x,train_y)
data = DataLoader(data,batch_size=BATCH_SIZE,shuffle=True)

In [28]:
# Accuracy 
def fitness_function(model):
    
    y_pred = model(train_x)
    y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
    accuracy = (y_pred == train_y).sum().float().item() / len(data.dataset)
    
    return accuracy

In [29]:
# Crossover and Mutation
def crossover_mutation(model1,model2):
    # Shape of the weights and biases
    shp = [i.numpy().shape for i in model1.parameters()]
    sz = [i[0]*i[1] if len(i) == 2 else i[0] for i in shp]
    
    # Flattening the parameters for cross over
    matrix_1 = np.concatenate([i.numpy().flatten() for i in model1.parameters()])
    matrix_2 = np.concatenate([i.numpy().flatten() for i in model2.parameters()])
    
    # Crossover
    start = len(matrix_1)//2 - 10
    end = len(matrix_1)//2 + 10
    mid_value = random.randrange(start,end)
    
    main_child_1 = np.concatenate([matrix_1[:mid_value], matrix_2[mid_value:]])
    main_child_2 = np.concatenate([matrix_2[:mid_value], matrix_1[mid_value:]])
    
    # Child 1 Mutation
    random_start = random.randrange(0,len(matrix_1)//2)
    random_end = random.randrange(random_start,len(matrix_1))
    
    child_1_mutate = main_child_1.copy()
    child_1_mutate[random_start:random_end] = child_1_mutate[random_start:random_end][::-1]
    
    # Child 2 Mutation
    random_start = random.randrange(0,len(matrix_1)//2)
    random_end = random.randrange(random_start,len(matrix_1))
    
    child_2_mutate = main_child_2.copy()
    child_2_mutate[random_start:random_end] = child_2_mutate[random_start:random_end][::-1]
    
    # Converting the array to parameters
    children = [main_child_1, main_child_2, child_1_mutate, child_2_mutate]
    output = list()
    
    for child in children:
        param = list()
        cum_sum = 0
        for i in range(len(sz)):
            array = child[cum_sum : cum_sum + sz[i]]
            array = array.reshape(shp[i])
            cum_sum += sz[i]
            param.append(array)
        param = np.array(param, dtype="object")
        output.append(param)
    
    output = np.array(output, dtype="object")
    return output

In [30]:
# Training 

torch.manual_seed(69)
torch.set_grad_enabled(False)
population_size = 10

def train(no):
    # Initial Population
    population = np.array([Model() for i in range(population_size)])
    
    best_model = None
    
    for loop in range(no):
        # Sort population based up on fitness function
        population = population[np.argsort([fitness_function(model) for model in population])]
        
        # Printing Max Accuracy
        best_model = population[-1]
        if(loop%10 == 0):
            print("Gen", loop, " :", fitness_function(population[-1]))
        
        # Parent Selection
        parent_max_1 = population[-1]
        parent_max_2 = population[-2]
        parent_max_3 = population[-3]
        parent_max_4 = population[-4] 
        parent_1 = population[0]
        parent_2 = population[1]
        
        # Crossover
        output_1 = crossover_mutation(parent_max_1, parent_max_3)
        output_2 = crossover_mutation(parent_max_2, parent_max_4)
        output = np.concatenate([output_1, output_2])
        
        # New population
        new_population = np.array([Model() for i in range(len(output))])
        for count, model in enumerate(new_population, 0):
            for index, param in enumerate(model.parameters(), 0):
                param.data = (torch.tensor(output[count][index]))
        
        # Add Poor performing parent to the new population
        new_population = np.concatenate([new_population, [parent_1, parent_2]])
        
        # New population becomes your next population
        population = new_population.copy()
        
    return best_model

In [31]:
best = train(100)

Gen 0  : 0.824
Gen 10  : 0.9168
Gen 20  : 0.9226666666666666
Gen 30  : 0.9272
Gen 40  : 0.9285333333333333
Gen 50  : 0.9293333333333333
Gen 60  : 0.9309333333333333
Gen 70  : 0.9314666666666667
Gen 80  : 0.9274666666666667
Gen 90  : 0.9338666666666666


## Testing

In [32]:
y_pred = best(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
ga_report=classification_report(y_pred,test_y)
print(ga_report)

              precision    recall  f1-score   support

           0       0.99      0.95      0.97      1177
           1       0.50      0.79      0.62        73

    accuracy                           0.94      1250
   macro avg       0.75      0.87      0.79      1250
weighted avg       0.96      0.94      0.95      1250



## Particle Swarm

In [33]:
from pyswarms.single import GlobalBestPSO
torch.manual_seed(420)
torch.set_grad_enabled(False)

model = Model()
param = np.concatenate([i.numpy().flatten() for i in model.parameters()])
shape = [i.numpy().shape for i in model.parameters()]
size = [i[0]*i[1] if len(i) == 2 else i[0] for i in shape]

print("Dim : ", len(param))
print("Layers Shape : ", shape)
print("Layers Size : ", size)

Dim :  209
Layers Shape :  [(16, 11), (16,), (1, 16), (1,)]
Layers Size :  [176, 16, 16, 1]


In [34]:
def objective_function(particle,shape=shape,size=size):
    accuracy = []
    output = []
    
    # Reshape the vector to weights and biases dimention
    for par in particle:
        param = list()
        cum_sum = 0
        for i in range(len(size)):
            array = par[cum_sum : cum_sum + size[i]]
            array = array.reshape(shape[i])
            cum_sum += size[i]
            param.append(array)
        param = np.array(param, dtype="object")
        output.append(param)
    
    for i in range(len(output)):
        # Copy Weights and Biases
        model = Model()
        for idx,wei in enumerate(model.parameters()):
            wei.data = (torch.tensor(output[i][idx])).to(torch.float32)
        
        # Calculate Accuracy
        y_pred = model(train_x)
        y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
        acc = (y_pred == train_y).sum().float().item() / len(data.dataset)
        accuracy.append(1 - acc) # Optimization function aims to reduce the cost so (1 - accuracy)
        
    return accuracy

In [35]:
# Tunable Parameters

options = {'c1': 0.6, 'c2': 0.3, 'w': 0.1}
dim = len(param)
x_max = 1.0 * np.ones(dim)
x_min = -1.0 * x_max
bounds = (x_min, x_max)

pso = GlobalBestPSO(n_particles=500, dimensions=209, options=options, bounds=bounds)

In [36]:
best_cost, best_parameters = pso.optimize(objective_function, iters=50)
print("Accuracy : ", 1 - best_cost)

2023-04-05 20:08:57,433 - pyswarms.single.global_best - INFO - Optimize for 50 iters with {'c1': 0.6, 'c2': 0.3, 'w': 0.1}
pyswarms.single.global_best: 100%|█████████████████████████████████████████████████████████████|50/50, best_cost=0.0501
2023-04-05 20:09:11,935 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.05013333333333336, best pos: [-2.28631325e-01  1.61860515e-01 -6.81710081e-02  4.13859248e-02
 -1.90679893e-02 -6.66993068e-02  1.92831548e-02  4.93720764e-02
 -1.35518054e-01 -5.28986331e-01 -3.46253581e-01  1.77460687e-01
 -1.19452809e-01 -3.74320607e-02 -1.87057597e-01  3.51452674e-01
 -3.27804575e-01 -2.80027165e-02  9.15182676e-02  2.46471857e-01
 -1.76845173e-01 -1.99526857e-01 -1.21187558e-01  1.31216979e-02
 -1.47093446e-01  3.01246754e-02 -3.26141067e-01  1.25026550e-01
 -8.22692436e-02  3.23838770e-02  3.18773794e-02  4.62390839e-01
  1.01897497e-01  2.46071010e-01  4.28089126e-02 -6.01159719e-01
  1.89108464e-01  5.06860102e-02  7.5171730

Accuracy :  0.9498666666666666


In [37]:
# Converting the best vector to model

result = []
for par in [best_parameters]:
    param = list()
    cum_sum = 0
    for i in range(len(size)):
        array = par[cum_sum : cum_sum + size[i]]
        array = array.reshape(shape[i])
        cum_sum += size[i]
        param.append(array)
    param = np.array(param, dtype="object")
    result.append(param)

best_model = Model()
for idx,wei in enumerate(best_model.parameters()):
    wei.data = (torch.tensor(result[0][idx])).to(torch.float32)

# Calculate Accuracy
y_pred = best_model(train_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
acc = (y_pred == train_y).sum().float().item() / len(data.dataset)
print("Accuracy : ", acc)

Accuracy :  0.9498666666666666


## Testing

In [38]:
y_pred = best_model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
ps_report=classification_report(y_pred,test_y)
print(ps_report)

              precision    recall  f1-score   support

           0       0.99      0.95      0.97      1175
           1       0.51      0.79      0.62        75

    accuracy                           0.94      1250
   macro avg       0.75      0.87      0.79      1250
weighted avg       0.96      0.94      0.95      1250



## Ant Colony

In [39]:
torch.manual_seed(6699)
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x197f4308040>

In [40]:
dummy = Model()
dummy_param = np.concatenate([i.numpy().flatten() for i in dummy.parameters()])
shape = [i.numpy().shape for i in model.parameters()]
size = [i[0]*i[1] if len(i) == 2 else i[0] for i in shape]
dim = len(dummy_param)

print("Dim : ", dim)
print("Layers Shape : ", shape)
print("Layers Size : ", size)

dummy = None
dummy_param = None

Dim :  209
Layers Shape :  [(16, 11), (16,), (1, 16), (1,)]
Layers Size :  [176, 16, 16, 1]


In [41]:
# Helper Functions

def model_to_vector(model):
    vector = np.concatenate([i.numpy().flatten() for i in model.parameters()])
    return vector
    
def vector_to_model(vector):
    param = list()
    cum_sum = 0
    for i in range(len(size)):
        array = vector[cum_sum : cum_sum + size[i]]
        array = array.reshape(shape[i])
        cum_sum += size[i]
        param.append(array)
    param = np.array(param, dtype="object")
    
    dummy_model = Model()
    for idx,wei in enumerate(dummy_model.parameters()):
        wei.data = (torch.tensor(param[idx])).to(torch.float32)
        
    return dummy_model    

def calc_accuracy(model):
    y_pred = model(train_x)
    y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
    acc = (y_pred == train_y).sum().float().item() / len(data.dataset)
    
    return acc

In [42]:
ants = 10
loops = 100
evaporation_rate = 0.2
influence_factor = 0.4

In [43]:
pheromones = np.ones(dim)
max_accuracy = 0
fittest_vector = None

for loop in range(loops):
    # Generate Solution
    paths = np.array([Model() for i in range(ants)])
    accuracy = []
    
    for ant in range(ants):
        # Flatten the weights and biases
        vector = model_to_vector(paths[ant])
        
        # Multiply with pheromones 
        vector = vector * pheromones
        
        # Calculate Accuracy and Append to the list
        model = vector_to_model(vector)
        acc = calc_accuracy(model)
        accuracy.append(acc)
        
        # Update the updated path
        paths[ant] = model
        
        # Reset
        model = None
        acc = None
        
    # Select fittest path and accuracy
    paths = paths[np.argsort(accuracy)]
    
    if accuracy[np.argmax(accuracy)] > max_accuracy:
        max_accuracy = accuracy[np.argmax(accuracy)]
        fittest_vector = model_to_vector(paths[-1])
    
    # Update pheromones
    delta = 0
    for ant in range(ants):
        # Flatten the weights and biases
        vector = model_to_vector(paths[ant])
        
        # Calculate delta
        delta += (vector - fittest_vector)*influence_factor
        
    pheromones = (1-pheromones)*evaporation_rate + delta
    
    if loop%10 == 0:
        print("Iters {} :".format(loop), calc_accuracy(paths[-1]))

Iters 0 : 0.9016
Iters 10 : 0.9026666666666666
Iters 20 : 0.9026666666666666
Iters 30 : 0.9026666666666666
Iters 40 : 0.9029333333333334
Iters 50 : 0.9026666666666666
Iters 60 : 0.9026666666666666
Iters 70 : 0.9130666666666667
Iters 80 : 0.9026666666666666
Iters 90 : 0.9021333333333333


## Testing

In [44]:
y_pred = best_model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
aco_report=classification_report(y_pred,test_y)
print(classification_report(y_pred,test_y))

              precision    recall  f1-score   support

           0       0.99      0.95      0.97      1175
           1       0.51      0.79      0.62        75

    accuracy                           0.94      1250
   macro avg       0.75      0.87      0.79      1250
weighted avg       0.96      0.94      0.95      1250



In [45]:
print(ga_report)

              precision    recall  f1-score   support

           0       0.99      0.95      0.97      1177
           1       0.50      0.79      0.62        73

    accuracy                           0.94      1250
   macro avg       0.75      0.87      0.79      1250
weighted avg       0.96      0.94      0.95      1250



In [46]:
print(py_report)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1135
           1       0.90      0.90      0.90       115

    accuracy                           0.98      1250
   macro avg       0.95      0.95      0.95      1250
weighted avg       0.98      0.98      0.98      1250



In [47]:
print(ps_report)

              precision    recall  f1-score   support

           0       0.99      0.95      0.97      1175
           1       0.51      0.79      0.62        75

    accuracy                           0.94      1250
   macro avg       0.75      0.87      0.79      1250
weighted avg       0.96      0.94      0.95      1250



## Comparison

### The pytorch optimization seems to be better than the evolutionary optimizations. But with respect to the evalutionary algorithm optimization all seems to perform nearly equal