# Loan analysis pt. 4

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
df = pd.read_csv("loan_data.csv")

In [None]:
sns.histplot(df, x="loan_amount", color="RosyBrown", binwidth=5000, kde=True) #aggregated
sns.rugplot(df, x="loan_amount", color="MediumSeaGreen", height=0.04)
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)

plt.show()

In [None]:
plt.figure(figsize=(8, 8)) # in inches, default 6.4x4.8
sns.boxplot(df, x="grade", y="loan_amount", palette="RdYlGn_r")
sns.stripplot(df, x="grade", y="loan_amount", color="gray", jitter=0.2, size=3)
plt.title("Distribution of Interest Rate", fontsize=16, fontweight="bold")
plt.ylabel("Interest Rate", fontsize=14)
sns.despine()
plt.grid(axis="y", linestyle="--")
plt.show()

In [None]:
# plot multiple distributions - loan amounts for Grade A and D loans
as_only = df[ df["grade"] == "A"]
ds_only = df[ df["grade"] == "D"]

In [None]:
sns.histplot(as_only, x="loan_amount", color="RosyBrown", binwidth=5000, kde=True)
plt.show()
sns.histplot(ds_only, x="loan_amount", binwidth=5000, kde=True)
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)

plt.show()

In [None]:
# open credit lines feature
df["open_credit_lines"].value_counts().sort_index()

In [None]:
# plot distribution of interest paid, segmented by grade - individual charts
plt.figure(figsize=(15, 5)) # 3 plots: 5x5 for one plot

# one row with three plots
plt.subplot(1, 3, 1) # rows, cols, active plot
filtered_df = df[ df["open_credit_lines"] == 1]
sns.barplot(filtered_df, hue="grade", y="paid_interest", palette="RdYlGn_r")

plt.subplot(1, 3, 2) # rows, cols, active plot
filtered_df = df[ df["open_credit_lines"] == 2]
sns.barplot(filtered_df, hue="grade", y="paid_interest", palette="RdYlGn_r")

plt.subplot(1, 3, 3) # rows, cols, active plot
filtered_df = df[ df["open_credit_lines"] == 3]
sns.barplot(filtered_df, hue="grade", y="paid_interest", palette="RdYlGn_r")

plt.savefig("credit-lines-lots.png")

In [None]:
plt.figure(figsize=(15,5))

# loop through numbers 1, 2, and 3
for i in range(1,4): # i = 1, 2, 3
    plt.subplot(1, 3, i) # select appropriate subplot
    filtered_df = df[ df["open_credit_lines"] == i] # filter for rows with i lines of credit
    sns.barplot(filtered_df, hue="grade", y="paid_interest", palette="RdYlGn_r") # plot

In [None]:
# graph for credit lines 1-9
plt.figure(figsize=(15,15))

# loop through numbers 1, 2, and 3
for i in range(1,10): # i = 1, 2, 3
    plt.subplot(3, 3, i) # select appropriate subplot
    filtered_df = df[ df["open_credit_lines"] == i] # filter for rows with i lines of credit
    sns.barplot(filtered_df, hue="grade", y="paid_interest", palette="RdYlGn_r") # plot
    plt.title(i)

plt.savefig("9graphs.png")

In [None]:
filtered_df = df[['loan_amount', 'annual_income', 'interest_rate', 'paid_interest']]
sns.pairplot(filtered_df)
plt.suptitle("Pairplot of Loan Amount, Annual Income, Interest Rate, and Paid Interest", y=1)
plt.savefig("pairplot.png")