In [None]:
!pip install transformers



In [None]:
# Import libraries
import pandas as pd
import numpy as np

from google.colab import drive
import os

import random
from datetime import datetime, timedelta
from transformers import pipeline

In [None]:
drive.mount('/content/drive')

drive_folder = "/content/drive/MyDrive/Aireen Y4S1/WIE3007 DATA MINING AND WAREHOUSING/Group Assignment 15%/Group Assignment Group6"

Mounted at /content/drive


In [None]:
# Setup huggingface token
from google.colab import userdata
HUGGINGFACE_TOKEN = userdata.get("HF_TOKEN")

from huggingface_hub import login
login(token=HUGGINGFACE_TOKEN)

# Data Simulation using GenAI

In [None]:
# Configuration
np.random.seed(42)
random.seed(42)
num_customers = 1000

employment_types = ["Full-Time", "Part-Time", "Self-Employed", "Contract", "Unemployed"]
employment_sectors = ["Technology", "Healthcare", "Finance", "Education", "Retail",
                      "Manufacturing", "Government", "Services", "Construction", "Other"]
loan_purposes = ["Home Purchase", "Car Loan", "Business", "Education", "Debt Consolidation",
                 "Home Improvement", "Medical", "Wedding", "Vacation", "Personal"]
education_levels = ["High School", "Associate", "Bachelor", "Master", "PhD", "Below High School"]
marital_status = ["Single", "Married", "Divorced", "Widowed"]
property_types = ["Rent", "Own with Mortgage", "Own Outright", "Living with Parents"]

# Initialize text generator
print("Loading text generation model...")
text_generator = pipeline("text-generation", model="distilgpt2", device=0)

def generate_financial_data(customer_id):
    # === AGE ===
    age = np.random.randint(18, 70)

    # === EDUCATION ===
    if age < 25:
        education = random.choices(
            ["Below High School", "High School", "Associate", "Bachelor", "Master"],
            weights=[0.08, 0.30, 0.30, 0.30, 0.02]
        )[0]
    elif age < 35:
        education = random.choices(
            ["High School", "Associate", "Bachelor", "Master"],
            weights=[0.15, 0.25, 0.40, 0.20]
        )[0]
    else:
        education = random.choices(
            ["Below High School", "High School", "Associate", "Bachelor", "Master", "PhD"],
            weights=[0.08, 0.30, 0.22, 0.25, 0.12, 0.03]
        )[0]

    # === EMPLOYMENT & INCOME (correlated with age and education) ===
    if age < 25:
        employment_type = random.choices(
            employment_types,
            weights=[0.40, 0.35, 0.05, 0.10, 0.10]
        )[0]
        base_income = np.random.uniform(1500, 4000)
    elif age < 35:
        employment_type = random.choices(
            employment_types,
            weights=[0.65, 0.15, 0.10, 0.08, 0.02]
        )[0]
        base_income = np.random.uniform(5000, 10000)
    elif age < 50:
        employment_type = random.choices(
            employment_types,
            weights=[0.70, 0.10, 0.15, 0.04, 0.01]
        )[0]
        base_income = np.random.uniform(5000, 14000)
    else:
        employment_type = random.choices(
            employment_types,
            weights=[0.65, 0.12, 0.18, 0.03, 0.02]
        )[0]
        base_income = np.random.uniform(4000, 10000)

    # Education boosts income
    education_multiplier = {
        "Below High School": 0.7,
        "High School": 0.9,
        "Associate": 1.0,
        "Bachelor": 1.3,
        "Master": 1.5,
        "PhD": 1.8
    }
    monthly_income = base_income * education_multiplier[education]
    annual_income = monthly_income * 12

    # Employment type affects income
    if employment_type == "Part-Time":
        monthly_income *= 0.6
    elif employment_type == "Unemployed":
        monthly_income = np.random.uniform(0, 5000)  # Benefits/savings
    elif employment_type == "Self-Employed":
        monthly_income *= np.random.uniform(0.7, 1.8)  # High variance

    employment_sector = random.choice(employment_sectors)

    # === EMPLOYMENT LENGTH ===
    if employment_type == "Unemployed":
        employment_length_years = 0
    else:
        max_years = min(age - 18, 40)
        employment_length_years = np.random.randint(0, max(1, max_years))

    # === MARITAL STATUS (age-dependent) ===
    if age < 25:
        marital = random.choices(marital_status, weights=[0.80, 0.15, 0.03, 0.02])[0]
    elif age < 35:
        marital = random.choices(marital_status, weights=[0.40, 0.50, 0.08, 0.02])[0]
    elif age < 50:
        marital = random.choices(marital_status, weights=[0.20, 0.60, 0.15, 0.05])[0]
    else:
        marital = random.choices(marital_status, weights=[0.15, 0.55, 0.20, 0.10])[0]

    # === DEPENDENTS ===
    if marital == "Single":
        dependents = random.choices([0, 1, 2], weights=[0.85, 0.10, 0.05])[0]
    elif marital == "Married":
        dependents = random.choices([0, 1, 2, 3, 4], weights=[0.20, 0.25, 0.30, 0.20, 0.05])[0]
    else:
        dependents = random.choices([0, 1, 2, 3], weights=[0.50, 0.25, 0.20, 0.05])[0]

    # === PROPERTY ===
    if age < 30:
        property_type = random.choices(property_types, weights=[0.50, 0.20, 0.05, 0.25])[0]
    elif age < 45:
        property_type = random.choices(property_types, weights=[0.25, 0.50, 0.20, 0.05])[0]
    else:
        property_type = random.choices(property_types, weights=[0.15, 0.35, 0.45, 0.05])[0]

    # === CREDIT SCORE (affected by many factors) ===
    base_credit_score = 650

    # Age effect (credit history)
    if age < 25:
        base_credit_score -= 50
    elif age > 40:
        base_credit_score += 30

    # Income effect
    if monthly_income > 6500:
        base_credit_score += 40
    elif monthly_income < 2000:
        base_credit_score -= 40

    # Employment effect
    if employment_type == "Full-Time":
        base_credit_score += 20
    elif employment_type == "Unemployed":
        base_credit_score -= 60

    # Education effect
    if education in ["Master", "PhD"]:
        base_credit_score += 15
    elif education == "Below High School":
        base_credit_score -= 20

    # Property ownership effect
    if property_type == "Own Outright":
        base_credit_score += 30
    elif property_type == "Rent":
        base_credit_score -= 10

    # Add randomness and constrain
    credit_score = int(np.clip(base_credit_score + np.random.randint(-80, 80), 300, 850))

    # === EXISTING DEBT ===
    debt_to_income_ratio = 0
    if annual_income > 0:
        # Better credit = lower DTI
        if credit_score > 750:
            debt_to_income_ratio = np.random.uniform(0.05, 0.25)
        elif credit_score > 650:
            debt_to_income_ratio = np.random.uniform(0.15, 0.40)
        elif credit_score > 550:
            debt_to_income_ratio = np.random.uniform(0.25, 0.55)
        else:
            debt_to_income_ratio = np.random.uniform(0.35, 0.70)

    monthly_debt = monthly_income * debt_to_income_ratio

    # === NUMBER OF EXISTING LOANS ===
    if credit_score > 700:
        existing_loans = random.choices([0, 1, 2, 3], weights=[0.30, 0.40, 0.20, 0.10])[0]
    elif credit_score > 600:
        existing_loans = random.choices([0, 1, 2, 3, 4], weights=[0.20, 0.35, 0.25, 0.15, 0.05])[0]
    else:
        existing_loans = random.choices([0, 1, 2, 3, 4], weights=[0.15, 0.25, 0.30, 0.20, 0.10])[0]

    # === LOAN APPLICATION ===
    loan_purpose = random.choice(loan_purposes)

    # Loan amount based on purpose and income
    if loan_purpose == "Home Purchase":
        loan_amount = np.random.uniform(150000, 500000)
    elif loan_purpose == "Car Loan":
        loan_amount = np.random.uniform(15000, 50000)
    elif loan_purpose == "Business":
        loan_amount = np.random.uniform(25000, 200000)
    elif loan_purpose == "Education":
        loan_amount = np.random.uniform(10000, 100000)
    elif loan_purpose == "Debt Consolidation":
        loan_amount = np.random.uniform(5000, 50000)
    else:
        loan_amount = np.random.uniform(5000, 30000)

    # Adjust loan amount based on income (realistic borrowing)
    max_loan = annual_income * 5  # Typical 5x income limit
    loan_amount = min(loan_amount, max_loan * np.random.uniform(0.5, 1.2))

    # === LOAN TERM ===
    if loan_purpose in ["Home Purchase", "Business"]:
        loan_term_months = random.choice([120, 180, 240, 360])  # 10-30 years
    elif loan_purpose == "Car Loan":
        loan_term_months = random.choice([36, 48, 60, 72])
    elif loan_purpose == "Education":
        loan_term_months = random.choice([60, 84, 120])
    else:
        loan_term_months = random.choice([12, 24, 36, 48, 60])

    # === INTEREST RATE (based on credit score) ===
    if credit_score > 750:
        interest_rate = np.random.uniform(3.5, 6.0)
    elif credit_score > 700:
        interest_rate = np.random.uniform(5.5, 8.5)
    elif credit_score > 650:
        interest_rate = np.random.uniform(7.5, 11.0)
    elif credit_score > 600:
        interest_rate = np.random.uniform(10.0, 14.0)
    else:
        interest_rate = np.random.uniform(13.0, 20.0)

    # === BANK RELATIONSHIP ===
    years_with_bank = np.random.randint(0, min(age - 17, 30))
    has_savings_account = random.random() < 0.70
    has_checking_account = random.random() < 0.70

    # === DEFAULT PREDICTION (TARGET VARIABLE) ===
    # Calculate default probability based on risk factors
    default_prob = 0.15  # Base 15% default rate

    # Credit score (strongest predictor)
    if credit_score > 750:
        default_prob *= 0.3
    elif credit_score > 700:
        default_prob *= 0.5
    elif credit_score > 650:
        default_prob *= 0.8
    elif credit_score > 600:
        default_prob *= 1.2
    else:
        default_prob *= 2.0

    # Debt-to-income ratio
    if debt_to_income_ratio > 0.50:
        default_prob *= 1.8
    elif debt_to_income_ratio > 0.40:
        default_prob *= 1.3

    # Employment
    if employment_type == "Unemployed":
        default_prob *= 3.0
    elif employment_type == "Full-Time":
        default_prob *= 0.7

    # Loan amount relative to income
    loan_to_income = loan_amount / max(annual_income, 1)
    if loan_to_income > 5:
        default_prob *= 1.6

    # Existing loans
    if existing_loans > 3:
        default_prob *= 1.4

    # Property ownership
    if property_type == "Own Outright":
        default_prob *= 0.6

    # Bank relationship
    if years_with_bank > 10:
        default_prob *= 0.8

    # Cap probability
    default_prob = min(default_prob, 0.85)

    # Generate binary outcome
    loan_default = 1 if random.random() < default_prob else 0

    # === MISSING DATA (realistic patterns) ===
    def maybe_missing(value, prob=0.01):
        return np.nan if random.random() < prob else value

    return {
        "CustomerID": customer_id,
        "Age": age,
        "Education": education,
        "EmploymentType": employment_type,
        "EmploymentSector": employment_sector,
        "EmploymentLengthYears": employment_length_years,
        "MonthlyIncome": monthly_income,
        "MaritalStatus": marital,
        "Dependents": dependents,
        "PropertyOwnership": property_type,
        "CreditScore": credit_score,
        "ExistingLoans": existing_loans,
        "MonthlyDebt": monthly_debt,
        "YearsWithBank": years_with_bank,
        "HasSavingsAccount": int(has_savings_account),
        "HasCheckingAccount": int(has_checking_account),
        "LoanPurpose": loan_purpose,
        "LoanAmount": loan_amount,
        "LoanTermMonths": loan_term_months,
        "InterestRate": interest_rate,
        "LoanDefault": loan_default,  # TARGET VARIABLE
        "_loan_purpose": loan_purpose,
        "_employment_type": employment_type,
        "_credit_score": credit_score
    }

def generate_loan_description(row):
    # Skip 8% of descriptions (realistic missing text data)
    if random.random() < 0.08:
        return np.nan

    purpose = row["_loan_purpose"]
    employment = row["_employment_type"]
    credit_score = row["_credit_score"]

    # Create contextual prompts
    prompts = {
        "Home Purchase": [
            "I am applying for a home loan to purchase",
            "Looking to buy a house for my family because",
            "Need mortgage financing for"
        ],
        "Car Loan": [
            "I need a car loan to buy a vehicle for",
            "Applying for auto financing to purchase",
            "Looking to finance a car because"
        ],
        "Business": [
            "I need a business loan to expand my",
            "Seeking financing to start my own",
            "Applying for business credit to"
        ],
        "Education": [
            "I need an education loan to pursue",
            "Applying for student financing for",
            "Need funding for my education in"
        ],
        "Debt Consolidation": [
            "I want to consolidate my debts to",
            "Applying for debt consolidation to reduce",
            "Need to refinance existing loans because"
        ],
        "Personal": [
            "I need a personal loan for",
            "Applying for financing to cover",
            "Need funds for"
        ]
    }

    prompt_list = prompts.get(purpose, prompts["Personal"])
    prompt = random.choice(prompt_list)

    try:
        result = text_generator(
            prompt,
            max_new_tokens=25,
            num_return_sequences=1,
            temperature=0.5,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            pad_token_id=text_generator.tokenizer.eos_token_id
        )

        description = result[0]["generated_text"].strip()

        # Clean up (remove incomplete sentences)
        sentences = description.split(".")
        if len(sentences) > 1:
            description = sentences[0] + "."

        return description

    except Exception as e:
        print(f"Error generating feedback: {e}")

        # Fallback descriptions
        fallbacks = {
            "Home Purchase": "Need financing for a new home purchase for my growing family.",
            "Car Loan": "Looking to purchase a reliable vehicle for daily commute.",
            "Business": "Seeking capital to expand business operations.",
            "Education": "Need funding to complete my degree program.",
            "Debt Consolidation": "Want to consolidate existing debts into single payment.",
            "Personal": "Need funds for personal expenses and improvements."
        }
        return fallbacks.get(purpose, "Applying for loan to meet financial needs.")

# === GENERATE DATASET ===
print(f"\nGenerating {num_customers} banking customer data...")

customers = []
for i in range(1, num_customers + 1):
    customer = generate_financial_data(i)
    customers.append(customer)

    if i % 100 == 0:
        print(f"  Generated {i}/{num_customers} customers...")

# Create DataFrame
df = pd.DataFrame(customers)

# Generate loan descriptions
print("\nGenerating loan purpose descriptions using SLM...")
descriptions = []
for idx, row in df.iterrows():
    desc = generate_loan_description(row)
    descriptions.append(desc)

    if (idx + 1) % 100 == 0:
        print(f"  Processed {idx + 1}/{len(df)} descriptions...")

df["LoanPurposeDescription"] = descriptions

# Remove metadata columns
df = df.drop(columns=["_loan_purpose", "_employment_type", "_credit_score"])

# Sort by CustomerID
df = df.sort_values("CustomerID").reset_index(drop=True)

# Save to CSV
df.to_csv(os.path.join(drive_folder, "synthetic_financial_data.csv"), index=False)

Loading text generation model...


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0



Generating 1000 banking customer data...
  Generated 100/1000 customers...
  Generated 200/1000 customers...
  Generated 300/1000 customers...
  Generated 400/1000 customers...
  Generated 500/1000 customers...
  Generated 600/1000 customers...
  Generated 700/1000 customers...
  Generated 800/1000 customers...
  Generated 900/1000 customers...
  Generated 1000/1000 customers...

Generating loan purpose descriptions using SLM...


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


  Processed 100/1000 descriptions...
  Processed 200/1000 descriptions...
  Processed 300/1000 descriptions...
  Processed 400/1000 descriptions...
  Processed 500/1000 descriptions...
  Processed 600/1000 descriptions...
  Processed 700/1000 descriptions...
  Processed 800/1000 descriptions...
  Processed 900/1000 descriptions...
  Processed 1000/1000 descriptions...


In [None]:
print("\nFirst 10 rows:")
df.head(10)


First 10 rows:


Unnamed: 0,CustomerID,Age,Education,EmploymentType,EmploymentSector,EmploymentLengthYears,MonthlyIncome,MaritalStatus,Dependents,PropertyOwnership,...,MonthlyDebt,YearsWithBank,HasSavingsAccount,HasCheckingAccount,LoanPurpose,LoanAmount,LoanTermMonths,InterestRate,LoanDefault,LoanPurposeDescription
0,1,56,Bachelor,Full-Time,Retail,14,11413.035298,Married,0,Rent,...,2350.379946,10,1,1,Vacation,19921.253949,12,3.749937,0,Need funds for the project.
1,2,41,High School,Full-Time,Construction,7,7203.03975,Single,0,Own with Mortgage,...,1297.827096,1,0,1,Personal,6410.289476,36,5.846382,0,Applying for financing to cover the costs of t...
2,3,38,Bachelor,Full-Time,Finance,11,13724.533663,Married,3,Rent,...,3057.926084,15,1,0,Home Improvement,20296.322368,36,6.376434,0,I need a personal loan for me to buy a new car.
3,4,32,Bachelor,Contract,Government,13,9464.454897,Single,0,Own with Mortgage,...,2324.616801,6,1,1,Home Improvement,29580.772145,60,10.509791,0,I need a personal loan for me to get a job in ...
4,5,38,Bachelor,Contract,Education,17,11770.841248,Divorced,0,Own with Mortgage,...,4538.25301,1,1,1,Business,123575.438123,240,5.547899,0,"Seeking financing to start my own company, I w..."
5,6,37,Bachelor,Full-Time,Finance,6,9319.997953,Married,1,Own with Mortgage,...,2426.149772,1,1,0,Vacation,27733.010052,24,12.650089,0,I need a personal loan for my children.
6,7,23,High School,Full-Time,Manufacturing,3,1817.868741,Single,0,Living with Parents,...,471.544222,3,1,1,Home Improvement,26057.119365,24,15.766052,0,Need funds for the entire project.
7,8,31,Bachelor,Full-Time,Education,7,11227.267973,Married,2,Own with Mortgage,...,4054.542154,8,1,0,Personal,23683.002753,48,9.553629,0,Applying for financing to cover the costs of t...
8,9,62,Associate,Part-Time,Healthcare,14,2907.32721,Married,0,Own Outright,...,490.28486,23,1,1,Medical,29672.173415,48,8.195505,0,Applying for financing to cover the cost of th...
9,10,28,Bachelor,Contract,Healthcare,7,7792.475627,Married,2,Rent,...,3364.697407,8,1,1,Car Loan,47420.530748,60,13.659839,0,Looking to finance a car because it’s a good i...


In [None]:
print("\nLast 10 rows:")
df.tail(10)


Last 10 rows:


Unnamed: 0,CustomerID,Age,Education,EmploymentType,EmploymentSector,EmploymentLengthYears,MonthlyIncome,MaritalStatus,Dependents,PropertyOwnership,...,MonthlyDebt,YearsWithBank,HasSavingsAccount,HasCheckingAccount,LoanPurpose,LoanAmount,LoanTermMonths,InterestRate,LoanDefault,LoanPurposeDescription
990,991,41,High School,Full-Time,Retail,9,8176.335822,Single,0,Rent,...,1877.188634,17,0,1,Wedding,14530.748819,24,6.242851,0,I need a personal loan for me.
991,992,61,High School,Full-Time,Retail,25,8159.772653,Married,2,Own Outright,...,1085.598604,4,1,1,Debt Consolidation,32679.107932,12,4.648756,0,Need to refinance existing loans because they ...
992,993,19,High School,Unemployed,Finance,0,4529.71984,Single,0,Own Outright,...,1989.952367,1,1,1,Business,182743.463232,180,18.00687,1,Seeking financing to start my own company.
993,994,52,Bachelor,Full-Time,Education,10,11882.493583,Married,0,Rent,...,3080.120683,3,1,1,Vacation,14809.552523,60,5.876573,0,Need funds for the upcoming game.
994,995,55,High School,Full-Time,Construction,0,6452.423682,Married,1,Own Outright,...,1104.266809,29,1,0,Business,146076.183928,360,5.672748,1,"Applying for business credit to the bank, whic..."
995,996,52,Master,Full-Time,Retail,1,9955.570024,Married,0,Own Outright,...,2175.206405,12,1,1,Car Loan,22996.505157,48,3.583308,0,Applying for auto financing to purchase a car ...
996,997,53,Master,Self-Employed,Construction,6,9964.38421,Divorced,1,Own with Mortgage,...,3578.589991,22,1,1,Car Loan,32930.512632,60,9.49847,0,Looking to finance a car because of the cost o...
997,998,30,Master,Full-Time,Healthcare,10,12460.858625,Divorced,0,Own with Mortgage,...,1643.310948,8,1,1,Debt Consolidation,18044.630579,24,4.937098,0,Need to refinance existing loans because the l...
998,999,23,Bachelor,Part-Time,Technology,3,1900.565977,Divorced,0,Rent,...,672.322499,2,1,1,Car Loan,27524.440552,48,18.874456,1,"Applying for auto financing to purchase a car,..."
999,1000,24,Associate,Part-Time,Services,2,2310.269752,Single,1,Rent,...,655.840484,3,1,1,Car Loan,34025.845122,48,8.854675,0,I need a car loan to buy a vehicle for a futur...


In [None]:
print("\nRandom sample of 5 rows:")
df.sample(10)


Random sample of 5 rows:


Unnamed: 0,CustomerID,Age,Education,EmploymentType,EmploymentSector,EmploymentLengthYears,MonthlyIncome,MaritalStatus,Dependents,PropertyOwnership,...,MonthlyDebt,YearsWithBank,HasSavingsAccount,HasCheckingAccount,LoanPurpose,LoanAmount,LoanTermMonths,InterestRate,LoanDefault,LoanPurposeDescription
38,39,33,High School,Full-Time,Construction,11,8891.334358,Widowed,1,Living with Parents,...,2051.57865,6,1,0,Personal,24879.654869,60,6.816914,0,Need funds for the project.
412,413,46,Associate,Full-Time,Construction,23,8951.860087,Married,2,Own Outright,...,2489.254381,25,1,1,Personal,7857.14425,24,7.536693,0,Applying for financing to cover the costs of t...
138,139,56,High School,Full-Time,Finance,23,8347.238861,Married,3,Own with Mortgage,...,3137.985784,7,0,1,Vacation,29729.622745,36,7.727452,0,I need a personal loan for me.
128,129,25,Master,Full-Time,Finance,0,12292.12039,Married,2,Living with Parents,...,2181.191479,2,1,1,Car Loan,49320.188006,72,5.116662,0,Applying for auto financing to purchase a car.
693,694,23,Associate,Part-Time,Services,0,1930.851515,Single,0,Rent,...,850.906181,3,1,0,Medical,29823.586645,48,14.580877,1,Need funds for the first time in a row.
183,184,67,Bachelor,Full-Time,Finance,7,6774.456927,Divorced,0,Rent,...,1175.200128,21,1,0,Business,142705.370689,120,8.104427,0,I need a business loan to expand my business.
754,755,40,High School,Full-Time,Manufacturing,18,12583.61739,Married,0,Rent,...,6846.473942,20,1,0,Business,138769.814098,240,10.126052,0,
8,9,62,Associate,Part-Time,Healthcare,14,2907.32721,Married,0,Own Outright,...,490.28486,23,1,1,Medical,29672.173415,48,8.195505,0,Applying for financing to cover the cost of th...
639,640,35,Bachelor,Full-Time,Government,1,15013.284311,Married,1,Own with Mortgage,...,7186.312979,11,0,1,Personal,19792.924222,12,12.666743,0,I need a personal loan for my family.
189,190,29,Bachelor,Full-Time,Healthcare,4,10463.906736,Married,1,Own with Mortgage,...,1336.362483,11,1,1,Business,103521.383959,240,4.433129,0,"I need a business loan to expand my business,”..."


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   CustomerID              1000 non-null   int64  
 1   Age                     1000 non-null   int64  
 2   Education               1000 non-null   object 
 3   EmploymentType          1000 non-null   object 
 4   EmploymentSector        1000 non-null   object 
 5   EmploymentLengthYears   1000 non-null   int64  
 6   MonthlyIncome           1000 non-null   float64
 7   MaritalStatus           1000 non-null   object 
 8   Dependents              1000 non-null   int64  
 9   PropertyOwnership       1000 non-null   object 
 10  CreditScore             1000 non-null   int64  
 11  ExistingLoans           1000 non-null   int64  
 12  MonthlyDebt             1000 non-null   float64
 13  YearsWithBank           1000 non-null   int64  
 14  HasSavingsAccount       1000 non-null   i