✅ Dataset updated with realistic LoanAmount, Tenure, Income, Expenses, RepaymentCapacity, and InterestRate


In [2]:
import pandas as pd
df = pd.read_csv("loan_reco_merged_dataset_updated.csv")
print(df["LoanType"].unique())



['MedicalLoan' 'DebtConsolidationLoan' 'TravelLoan' 'AutoLoan' 'HomeLoan'
 'PersonalLoan' 'BusinessLoan' 'EducationLoan' 'WeddingLoan'
 'Business Loan' 'Medical Loan' 'Home Loan' 'Home Improvement Loan'
 'Agri Loan' 'Auto Loan' 'Education Loan' 'Consumer Durable Loan'
 'Personal Loan']


In [2]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("loan_reco_merged_dataset_updated_final.csv")

# Define realistic ranges for LoanAmount, Tenure, and InterestRate per LoanType
loan_config = {
    "Auto Loan": {
        "amount": (500000, 2000000),  # ✅ Updated range: 5L – 20L
        "tenure": (1, 7),
        "interest_rate": 9.0
    },
    "Education Loan": {
        "amount": (12476, 2497686),
        "tenure": (3, 10),
        "interest_rate": 8.5
    },
    "Medical Loan": {
        "amount": (3709, 987769),
        "tenure": (1, 5),
        "interest_rate": 10.5
    },
    "Travel Loan": {
        "amount": (5428, 600000),
        "tenure": (1, 3),
        "interest_rate": 11.0
    },
    "Business Loan": {
        "amount": (983, 4991364),
        "tenure": (2, 15),
        "interest_rate": 12.0
    },
    "Agri Loan": {
        "amount": (50195, 1994150),
        "tenure": (1, 10),
        "interest_rate": 7.5
    },
    "Home Loan": {
        "amount": (19388, 3069535),
        "tenure": (5, 30),
        "interest_rate": 8.0
    },
    "Personal Loan": {
        "amount": (50000, 2000000),
        "tenure": (1, 7),
        "interest_rate": 13.0
    }
}

# Update values per loan type
for loan_type, config in loan_config.items():
    mask = df["LoanType"].str.contains(loan_type, case=False, na=False)
    n = mask.sum()
    if n == 0:
        continue

    # Loan amount and tenure
    loan_amounts = np.random.randint(config["amount"][0], config["amount"][1] + 1, size=n)
    tenures = np.random.randint(config["tenure"][0], config["tenure"][1] + 1, size=n)

    # Monthly income (≥ 2x EMI capacity, capped realistically)
    incomes = np.random.randint(20000, 200000, size=n) + (loan_amounts // 50)

    # Expenses (30–60% of income)
    expenses = (incomes * np.random.uniform(0.3, 0.6, size=n)).astype(int)

    # Repayment capacity (income - expenses - buffer, min 5k)
    repayment_capacity = (incomes - expenses - np.random.randint(2000, 10000, size=n)).clip(min=5000)

    # Ensure repayment capacity is never more than 70% of income
    repayment_capacity = np.minimum(repayment_capacity, (incomes * 0.7).astype(int))

    # Update dataset
    df.loc[mask, "LoanAmount"] = loan_amounts
    df.loc[mask, "PreferredTenure_Yeras"] = tenures
    df.loc[mask, "MonthlyIncome"] = incomes
    df.loc[mask, "Expenses"] = expenses
    df.loc[mask, "RepaymentCapacity"] = repayment_capacity
    df.loc[mask, "InterestRate"] = config["interest_rate"]

# Save updated dataset
df.to_csv("loan_reco_merged_dataset_updated_final.csv", index=False)

print("✅ Dataset updated! Auto Loan range is now ₹5L – ₹20L.")


✅ Dataset updated! Auto Loan range is now ₹5L – ₹20L.
