In [38]:
import pandas as pd
import numpy as np
import random
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [78]:
df = pd.read_csv(r"C:\Users\Deepak Thirukkumaran\OneDrive\Documents\Education\SNU b.tech A.I & D.S\2nd year_2022-23\4th SEMESTER\DAA\CIA_2\Bank_Personal_Loan_Modelling.csv")
print("Shape : ", df.shape)
df.head()

Shape :  (5000, 14)


Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [79]:
df.drop(["ID", "ZIP Code"],axis=1,inplace=True)
duplicate_str = "Duplicate: {}".format(df.duplicated().sum())
null_str = "Null: {}".format(df.isnull().sum().sum())

print(duplicate_str)
print(null_str)

Duplicate: 13
Null: 0


In [80]:
print(df["Experience"].unique())
df["Experience"] = abs(df["Experience"])

[ 1 19 15  9  8 13 27 24 10 39  5 23 32 41 30 14 18 21 28 31 11 16 20 35
  6 25  7 12 26 37 17  2 36 29  3 22 -1 34  0 38 40 33  4 -2 42 -3 43]


In [81]:
df.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

In [82]:
df = df[['Age', 'Experience', 'Income', 'Family', 'CCAvg','Education', 'Mortgage', 'Securities Account','CD Account', 'Online', 'CreditCard', 'Personal Loan']]
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values

In [132]:
x,y = X,Y
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.30,random_state=53)
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
x_train.shape, y_train.shape

((3500, 11), (3500,))

In [133]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [134]:
BATCH_SIZE = 3

In [135]:
train_x = torch.from_numpy(x_train).to(torch.float32)
train_y = torch.from_numpy(y_train).to(torch.float32)
train_x.shape, train_y.shape

(torch.Size([3500, 11]), torch.Size([3500]))

In [136]:
data = TensorDataset(train_x,train_y)
data = DataLoader(data,batch_size=BATCH_SIZE,shuffle=True)

In [137]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super(Model,self).__init__()
        
        self.layer1 = torch.nn.Linear(11,16)
        self.layer2 = torch.nn.Linear(16,1)
        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [138]:
model = Model()
print(model)

Model(
  (layer1): Linear(in_features=11, out_features=16, bias=True)
  (layer2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)


In [139]:
def fitness_function(model):
    
    y_pred = model(train_x)
    y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
    accuracy = (y_pred == train_y).sum().float().item() / len(data.dataset)
    
    return accuracy

In [151]:
def crossover_mutation(model1,model2):
    # Shape of the weights and biases
    shp = [i.numpy().shape for i in model1.parameters()]
    sz = [i[0]*i[1] if len(i) == 2 else i[0] for i in shp]

    matrix_1 = np.concatenate([i.numpy().flatten() for i in model1.parameters()])
    matrix_2 = np.concatenate([i.numpy().flatten() for i in model2.parameters()])
    

    start = len(matrix_1)//2 - 10
    end = len(matrix_1)//2 + 10
    mid_value = random.randrange(start,end)
    
    main_child_1 = np.concatenate([matrix_1[:mid_value], matrix_2[mid_value:]])
    main_child_2 = np.concatenate([matrix_2[:mid_value], matrix_1[mid_value:]])
    

    random_start = random.randrange(0,len(matrix_1)//2)
    random_end = random.randrange(random_start,len(matrix_1))
    
    child_1_mutate = main_child_1.copy()
    child_1_mutate[random_start:random_end] = child_1_mutate[random_start:random_end][::-1]
    

    random_start = random.randrange(0,len(matrix_1)//2)
    random_end = random.randrange(random_start,len(matrix_1))
    
    child_2_mutate = main_child_2.copy()
    child_2_mutate[random_start:random_end] = child_2_mutate[random_start:random_end][::-1]
    

    children = [main_child_1, main_child_2, child_1_mutate, child_2_mutate]
    output = list()
    
    for child in children:
        param = list()
        cum_sum = 0
        for i in range(len(sz)):
            array = child[cum_sum : cum_sum + sz[i]]
            array = array.reshape(shp[i])
            cum_sum += sz[i]
            param.append(array)
        param = np.array(param, dtype="object")
        output.append(param)
    
    output = np.array(output, dtype="object")
    return output

In [146]:
torch.manual_seed(69)
torch.set_grad_enabled(False)
population_size = 10

def train(no):

    population = np.array([Model() for i in range(population_size)])
    
    best_model = None
    
    for loop in range(no):

        population = population[np.argsort([fitness_function(model) for model in population])]
        

        best_model = population[-1]
        if(loop%10 == 0):
            print("Gen", loop, " :", fitness_function(population[-1]))
        

        parent_max_1 = population[-1]
        parent_max_2 = population[-2]
        parent_max_3 = population[-3]
        parent_max_4 = population[-4] 
        parent_1 = population[0]
        parent_2 = population[1]
        

        output_1 = crossover_mutation(parent_max_1, parent_max_3)
        output_2 = crossover_mutation(parent_max_2, parent_max_4)
        output = np.concatenate([output_1, output_2])
        

        new_population = np.array([Model() for i in range(len(output))])
        for count, model in enumerate(new_population, 0):
            for index, param in enumerate(model.parameters(), 0):
                param.data = (torch.tensor(output[count][index]))
        

        new_population = np.concatenate([new_population, [parent_1, parent_2]])
        

        population = new_population.copy()
        
    return best_model

In [147]:
best_model = train(100)

Gen 0  : 0.8251428571428572
Gen 10  : 0.9148571428571428
Gen 20  : 0.9145714285714286
Gen 30  : 0.9131428571428571
Gen 40  : 0.9154285714285715
Gen 50  : 0.9237142857142857
Gen 60  : 0.9225714285714286
Gen 70  : 0.9237142857142857
Gen 80  : 0.9245714285714286
Gen 90  : 0.924


In [148]:
test_x = torch.from_numpy(x_test).to(torch.float32)
test_y = torch.from_numpy(y_test).to(torch.float32)

In [149]:
test = TensorDataset(test_x,test_y)
test = DataLoader(test,batch_size=1)

In [150]:
y_pred = best_model(test_x)
y_pred = torch.where(y_pred>=0.5, 1, 0).flatten()
print(classification_report(y_pred,test_y))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96      1462
           1       0.22      0.87      0.35        38

    accuracy                           0.92      1500
   macro avg       0.61      0.89      0.66      1500
weighted avg       0.98      0.92      0.94      1500

