### EXploratory Data Analysis, Visualization, and Functionization


Here we convert the code from our EDA notebook to functions.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def plot_income_by_status(df):
    """Q1: How does annual income differ between approved and rejected loans?"""
    plt.figure(figsize=(6,4))
    sns.boxplot(x="loan_status", y="income_annum", data=df)
    plt.title("Income by Loan Status")
    plt.show()

In [None]:
def plot_dti_ratio(df):
    """Q2: Debt-to-Income Ratio distribution by status."""
    df["dti_ratio"] = df["loan_amount"] / df["income_annum"]
    plt.figure(figsize=(6,4))
    sns.histplot(data=df, x="dti_ratio", hue="loan_status", element="step", stat="density")
    plt.title("Debt-to-Income Ratio by Loan Status")
    plt.show()

In [None]:
def plot_dependents_impact(df):
    """Q3: Approval % by number of dependents."""
    dep_ct = pd.crosstab(df["no_of_dependents"], df["loan_status"], normalize="index") * 100
    dep_ct.plot(kind="bar", stacked=True, figsize=(6,4))
    plt.title("% Approval by Number of Dependents")
    plt.show()

In [None]:
def plot_cibil_vs_income(df):
    """Q4: CIBIL Score distribution by loan status."""
    plt.figure(figsize=(6,4))
    sns.kdeplot(data=df, x="cibil_score", hue="loan_status", fill=True)
    plt.title("CIBIL Score Distribution by Loan Status")
    plt.show()

In [None]:
def plot_self_employed_approval(df):
    """Q5: Approval % by self-employed status."""
    se_ct = pd.crosstab(df["self_employed"], df["loan_status"], normalize="index") * 100
    se_ct.plot(kind="bar", figsize=(6,4))
    plt.title("Approval % by Self-Employed Status")
    plt.show()

In [None]:
def plot_education_approval(df):
    """Q6: Approval % by education level."""
    edu_ct = pd.crosstab(df["education"], df["loan_status"], normalize="index") * 100
    edu_ct.plot(kind="bar", figsize=(6,4))
    plt.title("Approval % by Education Level")
    plt.show()

### **Testing the functions**

In [None]:
# 1. Load your data
df = pd.read_csv("loan_approval_dataset.csv")
df.head()

In [None]:
df.columns

In [None]:
# Strip whitespace on the copy’s column names
df.columns = df.columns.str.strip()
df.columns

In [None]:
plot_income_by_status(df)

In [None]:
plot_dti_ratio(df)

In [None]:
plot_dependents_impact(df)

In [None]:
plot_cibil_vs_income(df)

In [None]:
plot_self_employed_approval(df)

In [None]:
plot_education_approval(df)