In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
df.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [3]:
df.duplicated().sum()

0

In [4]:
df.isnull().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

### Streamlining Data for Precision

In [5]:
df.drop(columns=['ID', 'ZIP Code'], axis=1, inplace=True)

### Removing Negative Experience

In [6]:
df = df[df['Experience'] >= 0]

In [7]:
df.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,25,1,49,4,1.6,1,0,0,1,0,0,0
1,45,19,34,3,1.5,1,0,0,1,0,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,0,1


### Transforming Education Levels

In [8]:
def edu(education):
    if education == 1:
        return 'Undergrad'
    elif education == 2:
        return 'Grad'
    else:
        return 'Other'

df['EDU'] = df['Education'].apply(edu)

In [9]:
df.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard,EDU
0,25,1,49,4,1.6,1,0,0,1,0,0,0,Undergrad
1,45,19,34,3,1.5,1,0,0,1,0,0,0,Undergrad
2,39,15,11,1,1.0,1,0,0,0,0,0,0,Undergrad
3,35,9,100,1,2.7,2,0,0,0,0,0,0,Grad
4,35,8,45,4,1.0,2,0,0,0,0,0,1,Grad


### Categorizing Account Holders

In [10]:
def security(row):
    if row['Age'] > 30 and row['Income'] > 50:
        return 'High Security'
    elif row['Age'] <= 30 and row['Income'] > 30:
        return 'Medium Security'
    else:
        return 'Low Security'

df['Account_holder_category'] = df.apply(security, axis=1)

In [11]:
df.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard,EDU,Account_holder_category
0,25,1,49,4,1.6,1,0,0,1,0,0,0,Undergrad,Medium Security
1,45,19,34,3,1.5,1,0,0,1,0,0,0,Undergrad,Low Security
2,39,15,11,1,1.0,1,0,0,0,0,0,0,Undergrad,Low Security
3,35,9,100,1,2.7,2,0,0,0,0,0,0,Grad,High Security
4,35,8,45,4,1.0,2,0,0,0,0,0,1,Grad,Low Security


In [12]:
df.to_csv('Personal_data.csv', index=False)