# 1. Import the libraries

In [None]:
# Warnings
import warnings

# Data manipulation
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Generate new dummy data
from faker import Faker

# Ignore the warnings
warnings.filterwarnings('ignore')



# 2. Loading the dataset

In [None]:
# Load the dataset
df = pd.read_excel('juridical_credit_analyze.xlsx', sheet_name='Juridical Credit Analysis')

In [None]:
# Check the 5 first row of data
df.head()

Unnamed: 0,first_name,last_name,credit_score,income,employment_status,debt_to_income,loan_amount,collateral,payment_history,credit_utilization,length_of_credit_history,type_of_credit,outstanding_debt,bankruptcy_or_forclosure_history,legal_history,risk,address,age
0,Jono,Manullang,850,4760000,Unemployed,0.7,32010000,Equipment,Poor,0.939394,More than 5 years,"Bussiness,Credit",6560000,more than a year,yes,low,"Gg. Astana Anyar No. 4\n Balikpapan, KI 97728",25
1,Wirda,Firgantoro,440,17160000,1-2 years,0.8,42930000,Artworks,Good,0.808081,More than 5 years,"Home,Bussiness,Personal",11740000,less than a year,no,medium,"Jl. Moch. Ramdan No. 984\n Kotamobagu, MU 84283",54
2,Cahyanto,Melani,530,12620000,More than 5 years,0.3,22750000,Artworks,Excellent,0.686869,Less than a year,"Car,Credit",10830000,less than a year,no,low,"Jalan Pasir Koja No. 5\n Padang, Sulawesi Utar...",46
3,Hafshah,Salahudin,740,15600000,More than 5 years,0.8,43270000,Life Insurance Policies,Fair,0.494949,More than 5 years,"Car,Investment,Credit",27090000,more than a year,yes,medium,"Gg. Sentot Alibasa No. 6\n Tasikmalaya, Sumate...",23
4,Ciaobella,Pratiwi,630,22700000,1-2 years,0.6,13240000,Intellectual Property,Good,0.242424,More than 5 years,"Car,Credit",28190000,no,yes,medium,"Jalan Sentot Alibasa No. 9\n Cimahi, MU 77250",41


# 3. Drop the Address Column

In [None]:
# Drop the address column immediately
df.drop('address', inplace=True, axis=1)

In [None]:
# Check the dataset
df.head()

Unnamed: 0,first_name,last_name,credit_score,income,employment_status,debt_to_income,loan_amount,collateral,payment_history,credit_utilization,length_of_credit_history,type_of_credit,outstanding_debt,bankruptcy_or_forclosure_history,legal_history,risk,age
0,Jono,Manullang,850,4760000,Unemployed,0.7,32010000,Equipment,Poor,0.939394,More than 5 years,"Bussiness,Credit",6560000,more than a year,yes,low,25
1,Wirda,Firgantoro,440,17160000,1-2 years,0.8,42930000,Artworks,Good,0.808081,More than 5 years,"Home,Bussiness,Personal",11740000,less than a year,no,medium,54
2,Cahyanto,Melani,530,12620000,More than 5 years,0.3,22750000,Artworks,Excellent,0.686869,Less than a year,"Car,Credit",10830000,less than a year,no,low,46
3,Hafshah,Salahudin,740,15600000,More than 5 years,0.8,43270000,Life Insurance Policies,Fair,0.494949,More than 5 years,"Car,Investment,Credit",27090000,more than a year,yes,medium,23
4,Ciaobella,Pratiwi,630,22700000,1-2 years,0.6,13240000,Intellectual Property,Good,0.242424,More than 5 years,"Car,Credit",28190000,no,yes,medium,41


# 4. Generate Faker

In [None]:
# Set the Faker locale to Indonesia
fake = Faker('id_ID')

In [None]:
# Generate random addresses with RT, RW, and postal code in Indonesia
'''df['address'] = df.apply(lambda row: fake.street_address() + 
                         ', RT ' + str(fake.random_int(min=1, max=20, step=1)) + 
                         '/RW ' + str(fake.random_int(min=1, max=20, step=1)) +
                         ', ' + fake.postcode(), axis=1)'''

# Define a function to generate address
def generate_address(row):
  street_address = fake.street_address()
  rt = fake.random_int(min=1, max=20, step=1)
  rw = fake.random_int(min=1, max=20, step=1)
  postcode = fake.postcode()
  return f"{street_address}, RT {rt}/RW {rw}, {postcode}"

# Apply the function to create new 'address' column
df['address'] = df.apply(generate_address, axis=1)

In [None]:
# Make a RT, RW, and postal code column from address column

# Extract the RT from the address column
df['rt'] = df['address'].str.extract(r'RT\s+(\d+)\s*/')

# Extract the RW from the address column
df['rw'] = df['address'].str.extract(r'RW (\d+)', expand=False)

# Extract the postal code from the address column
df['postal_code'] = df['address'].str[-5:]

In [None]:
# Check the dataset
df.head()

Unnamed: 0,first_name,last_name,credit_score,income,employment_status,debt_to_income,loan_amount,collateral,payment_history,credit_utilization,...,type_of_credit,outstanding_debt,bankruptcy_or_forclosure_history,legal_history,risk,age,address,rt,rw,postal_code
0,Jono,Manullang,850,4760000,Unemployed,0.7,32010000,Equipment,Poor,0.939394,...,"Bussiness,Credit",6560000,more than a year,yes,low,25,"Jl. Pasteur No. 554, RT 1/RW 18, 29083",1,18,29083
1,Wirda,Firgantoro,440,17160000,1-2 years,0.8,42930000,Artworks,Good,0.808081,...,"Home,Bussiness,Personal",11740000,less than a year,no,medium,54,"Jl. Peta No. 440, RT 5/RW 15, 44765",5,15,44765
2,Cahyanto,Melani,530,12620000,More than 5 years,0.3,22750000,Artworks,Excellent,0.686869,...,"Car,Credit",10830000,less than a year,no,low,46,"Jl. Cihampelas No. 71, RT 13/RW 14, 77023",13,14,77023
3,Hafshah,Salahudin,740,15600000,More than 5 years,0.8,43270000,Life Insurance Policies,Fair,0.494949,...,"Car,Investment,Credit",27090000,more than a year,yes,medium,23,"Jl. Moch. Ramdan No. 76, RT 14/RW 12, 39131",14,12,39131
4,Ciaobella,Pratiwi,630,22700000,1-2 years,0.6,13240000,Intellectual Property,Good,0.242424,...,"Car,Credit",28190000,no,yes,medium,41,"Jl. Monginsidi No. 810, RT 11/RW 12, 16916",11,12,16916


# 5. Preprocessing the Data

In [None]:
# Move the "risk" column to the last index
risk_col = df.pop('risk')
df.insert(df.columns[-1], 'risk', risk_col)

In [None]:
# Check the dataset
df.head()

Unnamed: 0,first_name,last_name,credit_score,income,employment_status,debt_to_income,loan_amount,collateral,payment_history,credit_utilization,...,type_of_credit,outstanding_debt,bankruptcy_or_forclosure_history,legal_history,age,address,rt,rw,postal_code,risk
0,Jono,Manullang,850,4760000,Unemployed,0.7,32010000,Equipment,Poor,0.939394,...,"Bussiness,Credit",6560000,more than a year,yes,25,"Jl. Pasteur No. 554, RT 1/RW 18, 29083",1,18,29083,low
1,Wirda,Firgantoro,440,17160000,1-2 years,0.8,42930000,Artworks,Good,0.808081,...,"Home,Bussiness,Personal",11740000,less than a year,no,54,"Jl. Peta No. 440, RT 5/RW 15, 44765",5,15,44765,medium
2,Cahyanto,Melani,530,12620000,More than 5 years,0.3,22750000,Artworks,Excellent,0.686869,...,"Car,Credit",10830000,less than a year,no,46,"Jl. Cihampelas No. 71, RT 13/RW 14, 77023",13,14,77023,low
3,Hafshah,Salahudin,740,15600000,More than 5 years,0.8,43270000,Life Insurance Policies,Fair,0.494949,...,"Car,Investment,Credit",27090000,more than a year,yes,23,"Jl. Moch. Ramdan No. 76, RT 14/RW 12, 39131",14,12,39131,medium
4,Ciaobella,Pratiwi,630,22700000,1-2 years,0.6,13240000,Intellectual Property,Good,0.242424,...,"Car,Credit",28190000,no,yes,41,"Jl. Monginsidi No. 810, RT 11/RW 12, 16916",11,12,16916,medium
