# Loan analysis pt. 3

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
df = pd.read_csv("loan_data.csv")

In [None]:
states = ["DC", "AK", "HI"]
filtered_df = df[df['state'].isin(states)]

In [None]:
grouped_data = filtered_df.groupby(['state', "grade"])['loan_amount'].mean() # order matters
grouped_data.unstack().plot(kind='bar')
plt.show()

In [None]:
sns.barplot(
    filtered_df,
    x="state",
    y="loan_amount",
    hue="grade"
)
plt.show()

In [None]:
sns.barplot(
    filtered_df,
    x="state",
    y="loan_amount",
    hue="grade"
)
plt.xlabel("State")
plt.ylabel("Loan Amount")
plt.title("Loan Amount by State and Grade")
plt.legend(title="Grade", bbox_to_anchor=(1,1))
plt.show()

In [None]:
sns.barplot(
    filtered_df,
    x="state",
    y="interest_rate",
    hue="grade",
    estimator=np.max
)
plt.xlabel("State")
plt.ylabel("Loan Amount")
plt.title("Loan Amount by State and Grade")
plt.legend(title="Grade", bbox_to_anchor=(1,1))
plt.show()

In [None]:
sns.set_theme()
sns.barplot(
    filtered_df,
    x="state",
    y="loan_amount",
    hue="grade"
)
plt.xlabel("State")
plt.ylabel("Loan Amount")
plt.title("Loan Amount by State and Grade")
plt.legend(title="Grade", bbox_to_anchor=(1,1))
plt.show()

In [None]:
sns.set_theme(style="white")
sns.barplot(
    filtered_df,
    x="state",
    y="loan_amount",
    hue="grade"
)
plt.xlabel("State")
plt.ylabel("Loan Amount")
plt.title("Loan Amount by State and Grade")
plt.legend(title="Grade", bbox_to_anchor=(1,1))
plt.show()

In [None]:
sns.barplot(
    filtered_df,
    x="state",
    y="loan_amount",
    hue="grade",
    palette="Blues"
)
plt.xlabel("State")
plt.ylabel("Loan Amount")
plt.title("Loan Amount by State and Grade")
plt.legend(title="Grade", bbox_to_anchor=(1,1))
plt.show()

In [None]:
sns.barplot(
    filtered_df,
    x="state",
    y="loan_amount",
    hue="grade",
    palette="RdYlGn_r"
)
plt.xlabel("State")
plt.ylabel("Loan Amount")
plt.title("Loan Amount by State and Grade")
plt.legend(title="Grade", bbox_to_anchor=(1,1))
plt.savefig("interest_rates_graph.png")

In [None]:
sns.boxplot(df, x="interest_rate")
plt.show()

In [None]:
sns.boxplot(df, y="interest_rate")
plt.title("Distribution of Interest Rate", fontsize=16, fontweight="bold")
plt.ylabel("Interest Rate", fontsize=14)
sns.despine(bottom=True)
plt.show()

In [None]:
sns.boxplot(df, x="grade", y="interest_rate", palette="RdYlGn_r")
plt.title("Distribution of Interest Rate", fontsize=16, fontweight="bold")
plt.ylabel("Interest Rate", fontsize=14)
sns.despine()
plt.grid(axis="y", linestyle="--")
plt.show()

In [None]:
sns.boxplot(df, hue="grade", y="interest_rate", palette="RdYlGn_r")
plt.title("Distribution of Interest Rate", fontsize=16, fontweight="bold")
plt.ylabel("Interest Rate", fontsize=14)
sns.despine()
plt.grid(axis="y", linestyle="--")
plt.show()

In [None]:
plt.figure(figsize=(8, 6)) # in inches, default 6.4x4.8
sns.boxplot(df, x="grade", y="interest_rate", palette="RdYlGn_r")
plt.title("Distribution of Interest Rate", fontsize=16, fontweight="bold")
plt.ylabel("Interest Rate", fontsize=14)
sns.despine()
plt.grid(axis="y", linestyle="--")
plt.show()

In [None]:
sns.histplot(df, x="loan_amount")
plt.show()

In [None]:
sns.histplot(df, x="loan_amount", color="RosyBrown")
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
sns.histplot(df, x="loan_amount", color="RosyBrown", bins=30)
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
sns.histplot(df, x="loan_amount", color="RosyBrown", binwidth=5000)
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.xticks(visible=True) # doesn't work
plt.show()

In [None]:
sns.set_style("ticks")
sns.histplot(df, x="loan_amount", color="RosyBrown", binwidth=5000)
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
sns.histplot(df, x="loan_amount", color="RosyBrown", binwidth=5000, kde=True)
plt.title("Distribution of Loan Amount", fontsize=16, fontweight="bold")
plt.xlabel("Loan Amount")
plt.ylabel("")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
# Selecting the relevant columns for correlation
correlation_features = [
    'emp_length', 'debt_to_income', 'annual_income', 'open_credit_lines', 
    'total_credit_limit', 'total_credit_utilized', 'total_debit_limit', 
    'loan_amount', 'interest_rate', 'installment', 'balance', 'paid_interest'
]

# Compute the correlation matrix
correlation_table = df[correlation_features].corr()

# Create a custom colormap
custom_cmap = sns.diverging_palette(240, 10, as_cmap=True)  # 240 is blue, 10 is red

# Create a heatmap with the custom colormap
plt.figure(figsize=(8, 8))
sns.heatmap(correlation_table, 
            annot=True, 
            fmt=".2f", 
            cmap=custom_cmap,
            center=0,  # This ensures white is at 0
            vmin=-1,   # Force the scale from -1 to 1
            vmax=1,
            cbar_kws={"shrink": .8})

plt.title('Correlation Heatmap of Loan Features')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.show()

In [None]:
correlation_table

In [None]:
# Create a violin plot with built-in 'box' for internal statistics
plt.figure(figsize=(8, 8))
sns.violinplot(df, x='grade', y='loan_amount', inner='box', palette='RdYlGn_r')

# Add a grid on the y axis
plt.grid(axis="y", linestyle="--", alpha=0.7)

plt.title('Distribution of Loan Amounts by Loan Grade with Internal Box')
plt.xlabel('Loan Grade')
plt.ylabel('Loan Amount')

# Show the plot
plt.show()

In [None]:
# Creating a regression plot for paid interest versus loan amount
plt.figure(figsize=(8, 6))
sns.regplot(df, x='loan_amount', 
            y='paid_interest', 
            scatter_kws={"color": "slateblue"}, 
            line_kws={"color": "black"})

# Set title and labels
plt.title('Regression of Paid Interest on Loan Amount', fontsize=16, fontweight="bold")
plt.xlabel('Loan Amount ($)')
plt.ylabel('Paid Interest ($)')

# Set the grid style
plt.grid(linestyle='--', linewidth='0.5', color='gray')

# Setting the axes to start at 0
plt.xlim(left=0)

# Show the plot
plt.show()