In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

#### data set

In [2]:
df = pd.read_csv("/content/Bank_Personal_Loan_Modelling.csv")

#### Data preprocessing

In [52]:
df.drop(["ID", "ZIP Code"],axis=1,inplace=True)
print(df["Experience"].unique())
df["Experience"] = abs(df["Experience"])

KeyError: ignored

In [5]:
df.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

In [6]:
df = df[['Age', 'Experience', 'Income', 'Family', 'CCAvg','Education', 'Mortgage', 'Securities Account','CD Account', 'Online', 'CreditCard', 'Personal Loan']]

In [7]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values

### Spliting the data

In [8]:
x,y = X,Y

In [9]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=0)

In [10]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [54]:
print('X_train shape:', x_train.shape)
print('X_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

X_train shape: (3750, 11)
X_test shape: (1250, 11)
y_train shape: (3750,)
y_test shape: (1250,)


In [56]:
import torch
from torch.utils.data import DataLoader, TensorDataset

BATCH_SIZE = 32
LEARNING_RATE = 0.003
EPOCH = 50

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_x = torch.tensor(x_train, dtype=torch.float32).to(device)
train_y = torch.tensor(y_train, dtype=torch.float32).to(device)
print(train_x.shape, train_y.shape)

dataset = TensorDataset(train_x, train_y)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

torch.Size([3750, 11]) torch.Size([3750])


In [57]:
data = TensorDataset(train_x,train_y)
data = DataLoader(data,batch_size=BATCH_SIZE,shuffle=True)

In [18]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super(Model,self).__init__()
        
        self.layer1 = torch.nn.Linear(11,16)
        self.layer2 = torch.nn.Linear(16,1)
        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [58]:
model = Model()
print(model)

Model(
  (layer1): Linear(in_features=11, out_features=16, bias=True)
  (layer2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)


In [20]:
loss_function = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=LEARNING_RATE)

In [21]:
training_loss = [0]*EPOCH
training_accuracy = [0]*EPOCH

for i in range(1,EPOCH+1):
    epoch_loss = 0
    accuracy = 0
    for x_batch,y_batch in data:
        # x_batch,y_batch = x_batch.to(device),y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(x_batch)
        
        loss = loss_function(y_pred,y_batch.unsqueeze(1))
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item() * len(x_batch)
        cnt = (torch.where(y_pred>=0.5, 1, 0) == y_batch.unsqueeze(1)).sum().float()
        accuracy += cnt.item()
    if(i%10 == 0):
        print(i,epoch_loss/len(data.dataset),(accuracy)/len(data.dataset))

10 0.06562760436584553 0.9784
20 0.05356907806197802 0.9808
30 0.0465450307349364 0.984
40 0.042843871534530384 0.9858666666666667
50 0.038280525678147874 0.9882666666666666


In [22]:
test_x = torch.from_numpy(x_test).to(torch.float32)
test_y = torch.from_numpy(y_test).to(torch.float32)

In [23]:
test = TensorDataset(test_x,test_y)
test = DataLoader(test,batch_size=1)

In [24]:
y_pred = model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
accuracy = (y_pred == test_y).sum().float().item() / len(test.dataset)
print(accuracy)

0.9824


Cultural algorithm

In [61]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score

df = pd.read_csv('/content/Bank_Personal_Loan_Modelling.csv')

X = df.iloc[:, 3:13].values
y = df.iloc[:, 13].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

model = Sequential()
model.add(Dense(6, input_shape=(10,), activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

pop_size = 20
max_gen = 50
p_c = 0.8
p_m = 0.1

def fitness(individual):
    
    model.set_weights(individual)
    
    model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy')
    model.fit(X_train, y_train, batch_size=10, epochs=100)
    
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5)
    return accuracy_score(y_test, y_pred)

def init():
    population = []
    for i in range(pop_size):
        weights = []
        for layer in model.layers:
            if len(layer.get_weights()) > 0:
                w = layer.get_weights()[0]
                b = layer.get_weights()[1]
                weights.append(w.flatten())
                weights.append(b.flatten())
        population.append(np.concatenate(weights))
    return population

def selection(population, fitness_values):
    sorted_indices = np.argsort(fitness_values)[::-1]
    selected_indices = sorted_indices[:int(pop_size/2)]
    return [population[i] for i in selected_indices]

def crossover(parent1, parent2):
    child1 = np.zeros_like(parent1)
    child2 = np.zeros_like(parent2)
    for i in range(len(parent1)):
        if np.random.rand() < p_c:
            child1[i] = parent2[i]
            child2[i] = parent1[i]
        else:
            child1[i] = parent1[i]
            child2[i] = parent2[i]
    return child1, child2

def mutation(child):
    for i in range(len(child)):
        if np.random.rand() < p_m:
            child[i] += np.random.uniform(-1, 1)
    return child

def cultural_algorithm():
    
    population = init()
    for gen in range(max_gen):
        
        fitness_values = [fitness(individual) for individual in population]
        
        parents = selection(population, fitness_values)
        
        offspring = []
        while len(offspring) < pop_size:
            parent1 = np.random.choice(parents)
            parent2 = np.random.choice(parents)
            child1, child2 = crossover(parent1, parent2)
            child1 = mutation(child1)
            child2 = mutation(child2)
            offspring.append(child1)
            offspring.append(child2)
        
        population = parents + offspring
        
        population = np.unique(population, axis=0)
        population = population[:pop_size]
   
    fitness_values = [fitness(individual) for individual in population]
    
    best_index = np.argmax(fitness_values)
    best_individual = population[best_index]
    model.set_weights(best_individual)
    
    model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy')
    model.fit(X, y, batch_size=10, epochs=100)
    accuracy = model.evaluate(X_test, y_test)[1]
print("Accuracy:", accuracy)

Accuracy: [0.9008, 0.0992, 0.1032, 0.9008, 0.1136, 0.9008, 0.0992, 0.8877333333333334, 0.8688, 0.0992]


## Particle Swarm optimization

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score

df = pd.read_csv('/content/Bank_Personal_Loan_Modelling.csv')

X = df.iloc[:, 3:13].values
y = df.iloc[:, 13].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

model = Sequential()
model.add(Dense(6, input_shape=(10,), activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

w_size = sum([w.size for w in model.get_weights()])
pop_size = 20
max_iter = 50
c1 = 1.0
c2 = 1.0
w = 0.8
v_max = 0.5
v_min = -0.5

def fitness(individual):
    
    model.set_weights(np.split(individual, w_size))
    
    model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy')
    model.fit(X_train, y_train, batch_size=10, epochs=100, verbose=0)
    
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5)
    return accuracy_score(y_test, y_pred)

def init_population():
    population = []
    for i in range(pop_size):
        individual = np.random.uniform(-1, 1, w_size)
        population.append(individual)
    return population

def update_particle(particle, p_best, g_best, v):
    r1 = np.random.uniform(0, 1, w_size)
    r2 = np.random.uniform(0, 1, w_size)
    v = (w * v) + (c1 * r1 * (p_best - particle)) + (c2 * r2 * (g_best - particle))
    v = np.clip(v, v_min, v_max)
    particle += v
    particle = np.clip(particle, -1, 1)
    return particle, v

def pso():
    
    population = init_population()
    p_best = np.copy(population)
    fitness_values = np.array([fitness(individual) for individual in population])
    g_best_index = np.argmax(fitness_values)
    g_best = np.copy(population[g_best_index])
    
    for i in range(max_iter):
        for j in range(pop_size):
            population[j], v = update_particle(population[j], p_best[j], g_best, v=np.zeros(w_size))
            p_best_fitness = fitness(p_best[j])
            if fitness_values[j] < p_best_fitness:
                p_best[j] = population[j]
            if p_best_fitness > fitness(g_best):
                g_best = p_best[j]
        
        fitness_values = np.array([fitness(individual) for individual in population])
        g_best_index = np.argmax(fitness_values)
        g_best = np.copy(population[g_best_index])
        print("Iteration {}: Best Fitness = {:.4f}".format(i+1, fitness(g_best)))
    
    model.set_weights(np.split(g_best, w_size))
    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    x = score[1]
print(x)

[[25.  1. 49. ...  0.  0.  0.]
 [45. 19. 34. ...  0.  0.  0.]
 [39. 15. 11. ...  0.  0.  0.]
 ...
 [63. 39. 24. ...  0.  0.  0.]
 [65. 40. 49. ...  0.  1.  0.]
 [28.  4. 83. ...  0.  1.  1.]]


## Ant Colony optimization

In [42]:
torch.manual_seed(6699)
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7ff14a9e4d00>

In [43]:
dummy = Model()
dummy_param = np.concatenate([i.numpy().flatten() for i in dummy.parameters()])
shape = [i.numpy().shape for i in model.parameters()]
size = [i[0]*i[1] if len(i) == 2 else i[0] for i in shape]
dim = len(dummy_param)

print("Dim : ", dim)
print("Layers Shape : ", shape)
print("Layers Size : ", size)

dummy = None
dummy_param = None

Dim :  209
Layers Shape :  [(16, 11), (16,), (1, 16), (1,)]
Layers Size :  [176, 16, 16, 1]


In [44]:
# Helper Functions

def model_to_vector(model):
    vector = np.concatenate([i.numpy().flatten() for i in model.parameters()])
    return vector
    
def vector_to_model(vector):
    param = list()
    cum_sum = 0
    for i in range(len(size)):
        array = vector[cum_sum : cum_sum + size[i]]
        array = array.reshape(shape[i])
        cum_sum += size[i]
        param.append(array)
    param = np.array(param, dtype="object")
    
    dummy_model = Model()
    for idx,wei in enumerate(dummy_model.parameters()):
        wei.data = (torch.tensor(param[idx])).to(torch.float32)
        
    return dummy_model    

def calc_accuracy(model):
    y_pred = model(train_x)
    y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
    acc = (y_pred == train_y).sum().float().item() / len(data.dataset)
    
    return acc

In [45]:
ants = 10
loops = 100
evaporation_rate = 0.2
influence_factor = 0.4

In [46]:
pheromones = np.ones(dim)
max_accuracy = 0
fittest_vector = None

for loop in range(loops):
    # Generate Solution
    paths = np.array([Model() for i in range(ants)])
    accuracy = []
    
    for ant in range(ants):
        # Flatten the weights and biases
        vector = model_to_vector(paths[ant])
        
        # Multiply with pheromones 
        vector = vector * pheromones
        
        # Calculate Accuracy and Append to the list
        model = vector_to_model(vector)
        acc = calc_accuracy(model)
        accuracy.append(acc)
        
        # Update the updated path
        paths[ant] = model
        
        # Reset
        model = None
        acc = None
        
    # Select fittest path and accuracy
    paths = paths[np.argsort(accuracy)]
    
    if accuracy[np.argmax(accuracy)] > max_accuracy:
        max_accuracy = accuracy[np.argmax(accuracy)]
        fittest_vector = model_to_vector(paths[-1])
    
    # Update pheromones
    delta = 0
    for ant in range(ants):
        # Flatten the weights and biases
        vector = model_to_vector(paths[ant])
        
        # Calculate delta
        delta += (vector - fittest_vector)*influence_factor
        
    pheromones = (1-pheromones)*evaporation_rate + delta
    
    if loop%10 == 0:
        print("Iters {} :".format(loop), calc_accuracy(paths[-1]))

Iters 0 : 0.8978666666666667
Iters 10 : 0.9008
Iters 20 : 0.9008
Iters 30 : 0.9008
Iters 40 : 0.9008
Iters 50 : 0.9008
Iters 60 : 0.9008
Iters 70 : 0.9008
Iters 80 : 0.9008
Iters 90 : 0.9008


## Testing

In [49]:
y_pred = best_model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()

print(classification_report(y_pred,test_y))

              precision    recall  f1-score   support

           0       0.99      0.96      0.98      1180
           1       0.56      0.86      0.67        70

    accuracy                           0.95      1250
   macro avg       0.77      0.91      0.82      1250
weighted avg       0.97      0.95      0.96      1250

