In [1]:
import pandas as pd
import numpy as np

np.random.seed(12)

n_samples = 14000

loan_id = np.arange(1, n_samples + 1)
no_of_dependents = np.random.randint(0, 6, size=n_samples)
education = np.random.randint(0, 2, size=n_samples)
self_employed = np.random.randint(0, 2, size=n_samples)
age = np.random.randint(21, 81, size=n_samples)  # Age between 21 and 80

income_annum = np.where(
    self_employed == 1,
    np.random.randint(200, 1500, size=n_samples) * 1000,  # Self-employed: 200k - 1.5M Rs annually
    np.random.randint(300, 1500, size=n_samples) * 1000   # Salaried: 300k - 1.5M Rs annually
)

loan_amount = np.random.randint(50, 5000, size=n_samples) * 10
loan_term = np.random.randint(5, 30, size=n_samples)
cibil_score = np.random.randint(300, 900, size=n_samples)
residential_assets_value = np.random.randint(0, 1000, size=n_samples) * 100
commercial_assets_value = np.random.randint(0, 1000, size=n_samples) * 100
luxury_assets_value = np.random.randint(10, 500, size=n_samples) * 100
bank_asset_value = np.random.randint(100, 10000, size=n_samples) * 100

total_assets = (
    residential_assets_value + 
    commercial_assets_value + 
    luxury_assets_value + 
    bank_asset_value
)

max_loan_amount = np.where(
    self_employed == 1,
    1000000,  # Higher limit for self-employed
    500000  # Lower limit for salaried
)

max_loan_term = np.where(
    age <= 50,  # Younger applicants can have longer terms
    30,
    20  # Older applicants typically have shorter terms
)

min_debt_to_income_ratio = 0.3

loan_status = np.where(
    (age >= 21) & (age <= 70) &  # Age between 21 and 70
    (cibil_score >= 500) &  # Minimum CIBIL score of 750
    (income_annum >= 250000) &  # Minimum income for employed individuals
    (loan_amount <= max_loan_amount) &  # Maximum loan amount based on employment status
    (loan_term <= max_loan_term) &  # Maximum loan term based on age
    ((loan_amount / income_annum) <= min_debt_to_income_ratio) &  # Debt-to-Income Ratio
    (total_assets > 200000),  # Total assets above threshold
    1, 0
)

data = pd.DataFrame({
    'loan_id': loan_id,
    'no_of_dependents': no_of_dependents,
    'education': education,
    'self_employed': self_employed,
    'age': age,
    'income_annum': income_annum,
    'loan_amount': loan_amount,
    'loan_term': loan_term,
    'cibil_score': cibil_score,
    'residential_assets_value': residential_assets_value,
    'commercial_assets_value': commercial_assets_value,
    'luxury_assets_value': luxury_assets_value,
    'bank_asset_value': bank_asset_value,
    'loan_status': loan_status
})

data.to_csv('loan_approval_dataset.csv', index=False)

print("Dataset has been saved to 'loan_approval_dataset.csv'")

Dataset has been saved to 'loan_approval_dataset.csv'
