In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix

In [None]:
def createheatmap(data):
    corr = data.corr()
    fig = plt.figure(figsize=(12,7))
    ax = sns.heatmap(corr,annot=True,fmt='.2f',cmap="RdBu_r",xticklabels=True,yticklabels=True)
    ax.set_title("Heatmap of data")
    ax.xaxis.label.set_color("white")
    ax.yaxis.label.set_color("white")
    plt.tight_layout()
    plt.show()


In [None]:
import matplotlib.ticker as mtick

def plot_roc_curve(false_poitive_rate,true_positive_rate,label="ROC CURVE"):
    fig = plt.Figure(figsize=(12,6))
    ax = fig.add_subplot(1,1,1)

    plt.plot(false_poitive_rate,true_positive_rate)
    plt.plot([0,1],[0,1])
    plt.axes([0,1,0,1])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")

    title = ax.set_title(label)
    plt.setp(title,color="w")
    ax.xaxis.label.set_color("white")
    ax.yaxis.label.set_color("white")
    ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))
    ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))
    plt.grid(b=True,which="major",color="gray")
    fig.tight_layout()  
    plt.show

In [None]:
data = pd.read_csv("data/credit_risk_dataset.csv")
data.head()

In [None]:
data.shape

In [None]:
data.isna().sum()

In [None]:
data.dropna(inplace=True)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
selected_cols = ["person_age", "person_income", "person_emp_length","cb_person_cred_hist_length","loan_int_rate"]

fig,axes = plt.subplots(nrows=1,ncols=len(selected_cols),figsize=(5*len(selected_cols),5))

for i ,col in enumerate(selected_cols):
    sns.boxenplot(data[col],ax = axes[i])
    axes[i].set_title(f"Boxplot of {col}")
plt.tight_layout()
plt.show()

In [None]:
data["loan_status"].value_counts()

In [None]:

data["loan_status"].value_counts().plot(kind="pie",autopct="%0.1f%%")
plt.title("Loan Status ")
plt.show()

## SMOTE
Synthetic Minority Over-sampling Technique
It works by generating synthetic samples for the minority class rather than just duplicating existing ones.
``How SMOTE Works``
1) For each minority class sample, SMOTE selects k nearest neighbors (usually k=5).
2) It randomly picks one of the neighbors.
3) It generates a new synthetic sample by interpolating between the original and neighbor sample.

New sample = original + random(0,1) × (neighbor − original)

In [None]:
x=data.drop(columns=["loan_status"])
y = data["loan_status"]

cat_col = x.select_dtypes(include=["object","category"]).columns.tolist()
num_col = x.select_dtypes(include=["number","int64","float64"]).columns.to_list()

x_encode = pd.get_dummies(x,columns=cat_col,drop_first=True)  #  only transforms the categorical columns (cat_col) into one-hot encoded columns.The numerical columns are left unchanged and included as-is in x_encode
# .get_dummies() It’s a Pandas function that converts categorical variables (columns with text labels or categories) into one-hot encoded columns.


In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
x_resampled ,y_resampled = smote.fit_resample(x_encode,y)

In [None]:
x_resampled

In [None]:
x_resampled.shape,y_resampled.shape

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x_resampled,y_resampled,test_size=0.2,random_state=42)

In [None]:
from sklearn.compose import ColumnTransformer

col_transformation = ColumnTransformer(
    transformers=[
        ("ohe",OneHotEncoder(drop="First"),cat_col),
        ("num","passthrough",num_col)
    ]
)

In [None]:
pipeline = Pipeline([
    ("preprocess",col_transformation),
    ("model",RandomForestClassifier(random_state=42,n_estimators=10))
])

pipeline.fit(x_train,y_train)
y_pred = pipeline.predict(x_test)
print("acuracy score is :",accuracy_score(y_pred,y_test))

In [None]:
createheatmap(data)