In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [3]:
df = pd.read_csv("C:/Users/sanja/Downloads/Bank_Personal_Loan_Modelling.csv")

In [5]:
print(df["Experience"].unique())
df["Experience"] = abs(df["Experience"])

[ 1 19 15  9  8 13 27 24 10 39  5 23 32 41 30 14 18 21 28 31 11 16 20 35
  6 25  7 12 26 37 17  2 36 29  3 22 -1 34  0 38 40 33  4 -2 42 -3 43]


In [6]:
df = df[['Age', 'Experience', 'Income', 'Family', 'CCAvg','Education', 'Mortgage', 'Securities Account','CD Account', 'Online', 'CreditCard', 'Personal Loan']]

In [7]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values

In [8]:
x,y = X,Y

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [13]:
X_train.shape, y_train.shape

((4000, 11), (4000,))

In [14]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [15]:
BATCH_SIZE = 32
LEARNING_RATE = 0.003
EPOCHS = 50

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cpu


In [19]:
train_x = torch.from_numpy(X_train).to(torch.float32)
train_y = torch.from_numpy(y_train).to(torch.float32)

In [63]:
test_x = torch.from_numpy(X_test).to(torch.float32)
test_y = torch.from_numpy(y_test).to(torch.float32)

test_dataset = TensorDataset(test_x,test_y)
test_dataloader = DataLoader(test_dataset, batch_size=1)

test_loss = 0
test_accuracy = 0
for test_inputs, test_targets in test_dataloader:
    test_outputs = model(test_inputs)
    test_loss += loss_function(test_outputs, test_targets.unsqueeze(1)).item() * len(test_inputs)
    test_accuracy += (test_outputs.round() == test_targets.unsqueeze(1)).sum().item()

test_loss /= len(test_dataset)
test_accuracy /= len(test_dataset)
print(test_loss, test_accuracy)


0.04494380954546584 0.989


In [39]:
train_x.shape, train_y.shape

train_dataset = TensorDataset(train_x, train_y)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
data=train_dataset
data=train_dataloader

In [40]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
    
        self.layer1 = torch.nn.Linear(11, 16)
        self.layer2 = torch.nn.Linear(16, 1)
        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [41]:
model = Model()
model.to(device)
print(model)

Model(
  (layer1): Linear(in_features=11, out_features=16, bias=True)
  (layer2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)


In [42]:
loss_function = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [45]:
training_loss = [0]*EPOCHS
training_accuracy = [0]*EPOCHS
validation_loss = []
validation_accuracy = []
best_val_loss = np.inf

for epoch in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_accuracy = 0
    for batch_idx, (inputs, targets) in enumerate(data):
        model.train()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * len(inputs)
        epoch_accuracy += (outputs.round() == targets.unsqueeze(1)).sum().item()
        
    epoch_loss /= len(data.dataset)
    epoch_accuracy /= len(data.dataset)
    
    with torch.no_grad():
        val_loss = 0
        val_accuracy = 0
        model.eval()
        for val_inputs, val_targets in train_dataloader:
            val_outputs = model(val_inputs)
            val_loss += loss_function(val_outputs, val_targets.unsqueeze(1)).item() * len(val_inputs)
            val_accuracy += (val_outputs.round() == val_targets.unsqueeze(1)).sum().item()
        val_loss /= len(train_dataset)
        val_accuracy /= len(train_dataset)
    
    training_loss.append(epoch_loss)
    training_accuracy.append(epoch_accuracy)
    validation_loss.append(val_loss)
    validation_accuracy.append(val_accuracy)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pt')
    
    print(f"Epoch {epoch}: Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_accuracy:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")


Epoch 1: Training Loss: 0.1093, Training Accuracy: 0.9620, Validation Loss: 0.0985, Validation Accuracy: 0.9647
Epoch 2: Training Loss: 0.0937, Training Accuracy: 0.9673, Validation Loss: 0.0872, Validation Accuracy: 0.9698
Epoch 3: Training Loss: 0.0853, Training Accuracy: 0.9712, Validation Loss: 0.0805, Validation Accuracy: 0.9732
Epoch 4: Training Loss: 0.0796, Training Accuracy: 0.9722, Validation Loss: 0.0756, Validation Accuracy: 0.9738
Epoch 5: Training Loss: 0.0761, Training Accuracy: 0.9735, Validation Loss: 0.0725, Validation Accuracy: 0.9755
Epoch 6: Training Loss: 0.0733, Training Accuracy: 0.9750, Validation Loss: 0.0699, Validation Accuracy: 0.9758
Epoch 7: Training Loss: 0.0705, Training Accuracy: 0.9755, Validation Loss: 0.0688, Validation Accuracy: 0.9772
Epoch 8: Training Loss: 0.0692, Training Accuracy: 0.9770, Validation Loss: 0.0667, Validation Accuracy: 0.9755
Epoch 9: Training Loss: 0.0674, Training Accuracy: 0.9765, Validation Loss: 0.0649, Validation Accuracy:

In [54]:
training_loss = []
training_accuracy = []
validation_loss = []
validation_accuracy = []

for i in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_accuracy = 0
    
    # training loop
    model.train()
    for x_batch, y_batch in train_dataloader:
        optimizer.zero_grad()
        y_pred = model(x_batch)
        loss = loss_function(y_pred, y_batch.unsqueeze(1))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * len(x_batch)
        epoch_accuracy += (torch.round(y_pred) == y_batch.unsqueeze(1)).sum().item()
    epoch_loss /= len(train_dataset)
    epoch_accuracy /= len(train_dataset)
    training_loss.append(epoch_loss)
    training_accuracy.append(epoch_accuracy)
    
    # validation loop
    with torch.no_grad():
        model.eval()
        val_loss = 0
        val_accuracy = 0
        for val_inputs, val_targets in train_dataloader:
            val_outputs = model(val_inputs)
            val_loss += loss_function(val_outputs, val_targets.unsqueeze(1)).item() * len(val_inputs)
            val_accuracy += (torch.round(val_outputs) == val_targets.unsqueeze(1)).sum().item()
        val_loss /= len(train_dataset)
        val_accuracy /= len(train_dataset)
        validation_loss.append(val_loss)
        validation_accuracy.append(val_accuracy)
    
    if i % 10 == 0:
        print(f"Epoch {i}: Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")


Epoch 10: Train Loss: 0.0416, Train Accuracy: 0.9868, Val Loss: 0.0389, Val Accuracy: 0.9890
Epoch 20: Train Loss: 0.0409, Train Accuracy: 0.9875, Val Loss: 0.0384, Val Accuracy: 0.9872
Epoch 30: Train Loss: 0.0400, Train Accuracy: 0.9880, Val Loss: 0.0375, Val Accuracy: 0.9892
Epoch 40: Train Loss: 0.0387, Train Accuracy: 0.9880, Val Loss: 0.0367, Val Accuracy: 0.9875
Epoch 50: Train Loss: 0.0383, Train Accuracy: 0.9875, Val Loss: 0.0358, Val Accuracy: 0.9882


In [65]:
# testing loop
with torch.no_grad():
    model.eval()
    test_loss = 0
    test_accuracy = 0
    for test_inputs, test_targets in test_dataloader:
        test_outputs = model(test_inputs)
        test_loss += loss_function(test_outputs, test_targets.unsqueeze(1)).item() * len(test_inputs)
        test_accuracy += (torch.round(test_outputs) == test_targets.unsqueeze(1)).sum().item()
    test_loss /= len(test_dataset)
    test_accuracy /= len(test_dataset)

print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.9890


In [67]:
# classification report
with torch.no_grad():
    model.eval()
    y_pred = torch.tensor([], dtype=torch.long)
    y_true = torch.tensor([], dtype=torch.long)
    for test_inputs, test_targets in test_dataloader:
        test_outputs = model(test_inputs)
        y_pred = torch.cat((y_pred, torch.round(test_outputs).flatten().long()))
        y_true = torch.cat((y_true, test_targets.long()))
    print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       895
           1       0.97      0.92      0.95       105

    accuracy                           0.99      1000
   macro avg       0.98      0.96      0.97      1000
weighted avg       0.99      0.99      0.99      1000



In [68]:
# Accuracy 
def fitness_function(model, train_x, train_y):
    with torch.no_grad():
        y_pred = model(train_x)
        y_pred = torch.where(y_pred >= 0.5, 1, 0).flatten()
        accuracy = (y_pred == train_y).sum().float().item() / len(train_y)
    return accuracy


In [69]:
import random
import numpy as np

# Accuracy 
def fitness_function(model, train_x, train_y):
    
    y_pred = model(train_x)
    y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
    accuracy = (y_pred == train_y).sum().float().item() / len(train_y)
    
    return accuracy

# Crossover and Mutation
def crossover_mutation(model1,model2):
    # Shape of the weights and biases
    shp = [i.numpy().shape for i in model1.parameters()]
    sz = [i[0]*i[1] if len(i) == 2 else i[0] for i in shp]
    
    # Flattening the parameters for cross over
    matrix_1 = np.concatenate([i.numpy().flatten() for i in model1.parameters()])
    matrix_2 = np.concatenate([i.numpy().flatten() for i in model2.parameters()])
    
    # Crossover
    start = len(matrix_1)//2 - 10
    end = len(matrix_1)//2 + 10
    mid_value = random.randrange(start,end)
    
    main_child_1 = np.concatenate([matrix_1[:mid_value], matrix_2[mid_value:]])
    main_child_2 = np.concatenate([matrix_2[:mid_value], matrix_1[mid_value:]])
    
    # Child 1 Mutation
    random_start = random.randrange(0,len(matrix_1)//2)
    random_end = random.randrange(random_start,len(matrix_1))
    
    child_1_mutate = main_child_1.copy()
    child_1_mutate[random_start:random_end] = child_1_mutate[random_start:random_end][::-1]
    
    # Child 2 Mutation
    random_start = random.randrange(0,len(matrix_1)//2)
    random_end = random.randrange(random_start,len(matrix_1))
    
    child_2_mutate = main_child_2.copy()
    child_2_mutate[random_start:random_end] = child_2_mutate[random_start:random_end][::-1]
    
    # Converting the array to parameters
    children = [main_child_1, main_child_2, child_1_mutate, child_2_mutate]
    output = list()
    
    for child in children:
        param = list()
        cum_sum = 0
        for i in range(len(sz)):
            array = child[cum_sum : cum_sum + sz[i]]
            array = array.reshape(shp[i])
            cum_sum += sz[i]
            param.append(array)
        param = np.array(param, dtype="object")
        output.append(param)
    
    output = np.array(output, dtype="object")
    return output


In [93]:
torch.manual_seed(420)
torch.set_grad_enabled(False)
population_size = 10

class Model:
    def __init__(self):
        self.layers = [torch.randn(10, 10), torch.randn(10)]
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.sigmoid(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out

    def parameters(self):
        return self.layers

def fitness_function(model):
    x = torch.randn(100)
    y = torch.randn(1)
    output = torch.matmul(x.reshape(1, -1), torch.randn(100, 1)) + y
    return torch.mean(output)


def crossover_mutation(model1, model2):
    output1 = []
    output2 = []

    for layer1, layer2 in zip(model1.parameters(), model2.parameters()):
        shape = layer1.shape
        if len(shape) == 0:
            output1.append(layer1)
            output2.append(layer2)
            continue
        crossover_point = np.random.randint(0, shape[0])
        output1.append(torch.cat([layer1[:crossover_point], layer2[crossover_point:]]))
        output2.append(torch.cat([layer2[:crossover_point], layer1[crossover_point:]]))

    # Mutation
    for index, layer in enumerate(output1):
        mutation_probability = np.random.uniform(0, 1)
        if mutation_probability > 0.5:
            shape = layer.shape
            if len(shape) == 0:
                continue
            mutation_point = np.random.randint(0, shape[0])
            output1[index][mutation_point] += np.random.normal(0, 0.1, size=(1,))

    for index, layer in enumerate(output2):
        mutation_probability = np.random.uniform(0, 1)
        if mutation_probability > 0.5:
            shape = layer.shape
            if len(shape) == 0:
                continue
            mutation_point = np.random.randint(0, shape[0])
            output2[index][mutation_point] += np.random.normal(0, 0.1, size=(1,))

    return output1, output2


def train(no):
    # Initial Population
    population = [Model() for _ in range(population_size)]

    best_model = None

    for loop in range(no):
        # Sort population based on fitness function
        population = sorted(population, key=fitness_function)

        # Printing Max Accuracy
        best_model = population[-1]
        print(f"Gen {loop+1}: {fitness_function(population[-1])}")

        # Parent Selection
        parent_max_1 = population[-1]
        parent_max_2 = population[-2]
        parent_max_3 = population[-3]
        parent_max_4 = population[-4] 
        parent_1 = population[0]
        parent_2 = population[1]

        # Crossover
        output_1 = crossover_mutation(parent_max_1, parent_max_3)
        output_2 = crossover_mutation(parent_max_2, parent_max_4)
        output = np.concatenate([output_1, output_2])

        # New population
        new_population = [Model() for _ in range(len(output))]
        for count, model in enumerate(new_population):
            for index, param in enumerate(model.parameters()):
                param.data = torch.tensor(output[count][index])

        # Add Poor performing parent to the new population
        new_population += [parent_1, parent_2]

        # New population becomes your next population
        population = new_population

    return best_model



In [94]:
best_model = train(10)

Gen 1: -9.367940902709961
Gen 2: 15.299262046813965
Gen 3: 1.8722236156463623
Gen 4: 0.25427961349487305
Gen 5: -3.3113584518432617
Gen 6: -13.185609817504883
Gen 7: -9.864635467529297
Gen 8: 2.79742693901062
Gen 9: 18.158798217773438
Gen 10: 0.38349276781082153


  param.data = torch.tensor(output[count][index])
