**Import Modules**

In [None]:
!pip install catboost

In [None]:
from zipfile import ZipFile
from sklearn.metrics import accuracy_score, recall_score, precision_score, \
f1_score, roc_auc_score, roc_curve, auc, confusion_matrix, classification_report
import string
from sklearn.model_selection import train_test_split
import os
import statistics
import shutil
import matplotlib
import re
import warnings
from catboost import CatBoostClassifier, cv
import catboost
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
np.printoptions(pricision=2)
import pandas as pd
import itertools
pd.set_option('display.max_columns', 30)
# %matplotlib inline
warnings.filterwarnings('ignore')
# nltk.download('stopwords')
plt.style.use('ggplot')
from scipy.stats.contingency import chi2_contingency
from sklearn.feature_selection import chi2, SelectKBest, f_classif
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PowerTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.stats import boxcox, skew, kurtosis, normaltest
from sklearn.decomposition import PCA
import sklearn.impute
from catboost import Pool
import pickle

In [None]:
dtypes = dict(
    Month="category",
    Name="category",
    Occupation="category",
    Type_of_Loan="category",
    Credit_History_Age="category",
    Payment_Behaviour="category"
)


train_df = pd.read_csv("train.csv", dtype=dtypes, parse_dates=['Month'])
train_df["is_train"] = True
test_df = pd.read_csv("test.csv", dtype=dtypes, parse_dates=['Month'])
test_df["is_train"] = False
df = pd.concat([train_df, test_df])

In [None]:
df.head()

In [None]:
np.unique(df["Credit_Mix"])

In [None]:
df.drop(["Name", "SSN", "ID"], axis=1, inplace=True, errors="ignore") # Dropping irrelevant columns

**Find All Unique Values**

In [None]:
def get_unique_values(df):
    cat_cols = df.select_dtypes("object").columns

    data_info = np.zeros((len(cat_cols), 5), dtype="object")
    for i, col in enumerate(cat_cols):
        if len(df[col].unique()) > 5000:
            continue
        else:
            unique_values, counts = np.unique(
                np.array(df[col], dtype=str), return_counts=True)
            num_of_uv = len(unique_values)
            unique_val_percent = np.round(counts / counts.sum(), 2)
            data_info[i, :] = [col, unique_values.tolist(
            ), counts.tolist(), num_of_uv, unique_val_percent]
    return pd.DataFrame(data_info, columns=["column", "unique", "counts", "len_unique_values", "%_unique_values"])

In [None]:
unique_values_df = get_unique_values(df)
unique_values_df.head()

**Data Processing**

In [None]:
class DataProcessor:

    def __init__(self, groupby, data_frame):
        self.groupby = groupby
        self.df = data_frame

    def get_month(self, x):
     if not pd.isnull(x):
         year_month = re.findall(r"\d+", x)
         months = (int(year_month[0])*12) + np.int64(year_month[-1])
         return months
     else:
         x

    @staticmethod
    def get_numbers(text):
        digits = re.findall(r'\d+', str(text))
        digits = ','.join(digits)
        return digits

    @staticmethod
    def replace_special_character(text):
        if "NM" in str(text):
            return "No"

        if "payments" in str(text) or "_" not in str(text):
            return text

        clean_text = str(text).replace("_", "")
        return np.nan if clean_text == "nan" else clean_text

    @staticmethod
    def preprocess_text(texts:str) -> tuple[dict, list[list[str]]]:
        dictionary = {}

        tokens = [str(text).lower().replace("and", "").split(",") for text in texts]
        tokens = [[token.strip() for token in token_list if token not in string.punctuation] for token_list in tokens]
        for token_list in tokens:
            for token in token_list:
                if token not in dictionary:
                    size = len(dictionary)
                    dictionary[token] = size
        return (dictionary, ["|".join(words) for words in tokens])

    @staticmethod
    def fill_na(df: pd.DataFrame, groupby=None):
        cat_features = df.select_dtypes(exclude="number").columns.drop(
            ["is_train", "Credit_Score", "Type_of_Loan"])
        num_features = df.select_dtypes(include="number").columns

        df["Type_of_Loan"].fillna("not specified", inplace=True)

        def fill_na_cat(df):
            df[cat_features] = df.groupby(groupby)[cat_features].transform(
                lambda x: x.fillna(x.mode()[0]))
            return df

        def fill_na_num(df):
            df[num_features] = df.groupby(groupby)[num_features].transform(
                lambda x: x.fillna(x.median()))
            return df

        df = fill_na_cat(df)
        df = fill_na_num(df)
        return df

    def preprocess(self):
        self.df['Age'] = self.df.Age.apply(DataProcessor.get_numbers)
        self.df = self.df.applymap(DataProcessor.replace_special_character)
        self.df = self.df.apply(lambda x: pd.to_numeric(x, errors="ignore"))
        self.df["Credit_Mix"] = self.df.groupby(self.groupby)["Credit_Mix"].transform(lambda x: x.replace("", x.mode()[0]))
        self.df["Payment_Behaviour"] = self.df.groupby(self.groupby)["Payment_Behaviour"].transform(
            lambda x: x.replace("!@9#%8" if x.mode()[0] != "@9#%8" else np.nan)
        )
        self.df["Type_of_Loan"] = self.df[["Type_of_Loan"]].apply(lambda x:  DataProcessor.preprocess_text(x.values)[-1])
        self.df["Type_of_Loan"] = self.df["Type_of_Loan"].str.replace(" ", "_").str.replace("|", " ")
        self.df["Credit_History_Age"] = self.df["Credit_History_Age"].apply(lambda x: self.get_month(x))
        self.df["Monthly_Balance"] = pd.to_numeric(self.df.Monthly_Balance, errors="coerce")
        self.df = DataProcessor.fill_na(self.df, "Customer_ID")

        return self.df

In [None]:
preprocesor = DataProcessor("Customer_ID", df)
data = preprocesor.preprocess()

In [None]:
data.loc[df["Num_Bank_Accounts"]<0, "Num_Bank_Accounts"] = 0  # Replacing account balances less than zero with zero
data.loc[data["Type_of_Loan"]=="nan", "Type_of_Loan"] = np.nan  #Replace "nan" values in the 'Type_of_Loan' column with NaN for consistency
data.loc[data["Occupation"] == "", "Occupation"] = np.nan  #Replace "nan" values in the 'Occupation' column with NaN for consistency
data.loc[data["Credit_Mix"] == "", "Credit_Mix"] = np.nan  #Replace "nan" values in the 'Credit_Mix' column with NaN for consistency

**Custom Outlier Removal and Skewness Correction**

In [None]:
class ClipOutliersTransformer(BaseEstimator, TransformerMixin):

    def __init__(self,
                 lower_quantile,
                 upper_quantile,
                 multiply_by=1.5,
                 replace_with_median: bool = False):
        self.lower_quantile = lower_quantile
        self.upper_quantile = upper_quantile
        self.multiply_by = multiply_by
        self.replace_with_median = replace_with_median

        self.lower_limit = 0
        self.upper_limit = 0
        self.feature_names_in_ = None

    def fit(self, X, y=None):
        q1, q3 = np.quantile(X, [self.lower_quantile, self.upper_quantile])
        iqr = q3 - q1
        self.lower_limit = q1 - (self.multiply_by * iqr)
        self.upper_limit = q3 + (self.multiply_by * iqr)
        # self.feature_names_in_ = X.columns.tolist()

        return self

    def transform(self, X):
        if self.replace_with_median:
            return np.where(
                ((X >= self.lower_limit) & (X <= self.upper_limit)), X,
                np.median(X))
        else:
            return np.clip(X, self.lower_limit, self.upper_limit)


def get_skewness(df, lower=None, upper=None):
    columns = df.columns
    skewness: pd.Series = df[columns].skew()
    highly_skewed = skewness[(skewness <= lower) |
                             (skewness >= upper)].index.to_list()
    lowly_skewed = skewness[(skewness > lower)
                            & (skewness < upper)].index.to_list()
    return (highly_skewed, lowly_skewed)

def remove_outliers(df: pd.DataFrame):
    category = df.select_dtypes(exclude="number").columns.drop(
        ["Credit_Score"])
    numbers = df.select_dtypes(include="number").columns

    highly_skewed, lowly_skewed = get_skewness(df[numbers],
                                               lower=-0.8,
                                               upper=0.8)

    df[highly_skewed] = df[highly_skewed].apply(
        lambda x: ClipOutliersTransformer(
            0.25, 0.75, multiply_by=1.5, replace_with_median=True).
        fit_transform(x))

    df[lowly_skewed] = df[lowly_skewed].apply(
        lambda x: ClipOutliersTransformer(
            0.25, 0.75, multiply_by=1.5, replace_with_median=False).
        fit_transform(x))
    return df

In [None]:
data = remove_outliers(data)

In [None]:
def make_boxplot(df, column, ax):
    sns.boxplot(x="Credit_Score", y=column, data=df, ax=ax, width=0.8, palette="Set2")
    plt.xticks(rotation=90)
    # add the five number summary to the plot
    plt.title(column, fontdict={"fontsize": 10})
    plt.xticks(rotation=0)

In [None]:
matplotlib.rc(("xtick", "ytick", "text"), c="k")
matplotlib.rc("figure", dpi=80)

In [None]:
def plot_boxplot_num_cols(df):
    fig = plt.figure(figsize=(18, 14), dpi=300)
    numb_columns = df.select_dtypes(include="number").columns
    for column in numb_columns:
        ax = fig.add_subplot(5, 4, list(numb_columns).index(column)+1)
        make_boxplot(df, column, ax)
        plt.tight_layout(pad=0.3)
    plt.tight_layout()
    plt.show()

**Box Plot**

In [None]:
plot_boxplot_num_cols(data)

In [None]:
data = data.reset_index(drop=True)
sns.countplot(data=data, x="Credit_Mix", hue="Credit_Score")

In [None]:
data.to_csv("clean_data_credit.csv", index=False)

In [None]:
import os
"clean_data_credit.csv" in os.listdir()

In [None]:
df = pd.read_csv("clean_data_credit.csv")

In [None]:
df_copy = data.copy()
df_copy["Monthly_Balance"] = df_copy.groupby("Customer_ID")["Monthly_Balance"].transform(lambda x: np.where((x>np.quantile(x, 0.75)) | x<np.quantile(x, 0.75), np.median(x), x))

In [None]:
cross_tab = pd.crosstab(values=df_copy["Monthly_Balance"], index=[
                        df_copy["Credit_Score"], df_copy["Credit_Mix"]], columns="Monthly_Balance", aggfunc="mean").reset_index()

main_group = pd.pivot_table(cross_tab, "Monthly_Balance", "Credit_Score", aggfunc=np.mean)
cross_tab

In [None]:
b = plt.cm.Blues
a = plt.cm.Accent

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))
fig.suptitle("Distribution of Monthly_Balance by Credit Score & Credit Mix",
             fontsize=11,
             color="k")
# fig.patch.set_alpha(0)
# fig.patch.set_facecolor("#ff9999")
fig.set_frameon(True)

pie1, *_, texts = ax.pie(x=main_group["Monthly_Balance"],
                         labels=main_group.index,
                         autopct="%.1f%%",
                         radius=1.3,
                         colors=[a(80, 1), b(100, 1),
                                 a(0, 1)],
                         pctdistance=0.8,
                         textprops={"size": 9},
                         frame=True)
plt.setp(pie1, width=0.5)
ax.set_frame_on(True)

pie2, *_, texts = ax.pie(x=cross_tab["Monthly_Balance"],
                         autopct="%.0f%%",
                         radius=0.8,
                         colors=[
                             a(80, 0.9),
                             a(80, 0.8),
                             a(80, 0.7),
                             b(100, 0.9),
                             b(100, 0.8),
                             b(100, 0.7),
                             a(0, 0.8),
                             a(0, 0.65),
                             a(0, 0.5)
],
    textprops={"size": 8})
plt.setp(pie2, width=0.5)
legend_labels = np.unique(cross_tab["Credit_Mix"])

legend_handles = [
    plt.plot([], label=legend_labels[0], c="k"),
    plt.plot([], label=legend_labels[1], c='b'),
    plt.plot([], label=legend_labels[-1], c="g")
]
plt.legend(shadow=True,
           frameon=True,
           facecolor="inherit",
           loc="best",
           title="credit Score & Mix",
           bbox_to_anchor=(1, 1, 0.5, 0.1))

plt.show()

In [None]:
def make_countplot(df: pd.DataFrame):

    cat_cols = df.select_dtypes(exclude="number").columns.drop(
        ['Credit_Score', 'is_train', 'Customer_ID', "Type_of_Loan"])
    cat_cols = list(cat_cols)
    cat_cols.pop(-1)
    cat_cols.insert(-2, "Payment_Behaviour")

    fig, axes = plt.subplots(figsize=(12, 6), dpi=300)
    fig.suptitle("Counts of categorical columns")
    axes.grid(visible=False)
    axes.xaxis.set_tick_params(labelbottom=False)
    axes.yaxis.set_tick_params(labelleft=False)

    def __plot_graph(df, col, ax: plt.Axes, legend=False):
        sns.countplot(
            data=df,
            x=col,
            ax=ax,
            hue="Credit_Score",
        )
        # label =ax.get_xlabel()
        ax.set_xlabel(col, fontdict={"size": 9})
        ax.set_title(f"by {col}", fontdict={"size": 9})
        ax.get_xticklabels()
        ax.tick_params(labelsize=7, axis="y")
        ax.set_xticklabels(ax.get_xticklabels(),
                           rotation=90,
                           fontdict=dict(size=7))
        ax.grid(False)
        if legend:
            ax.legend(shadow=True,
                      loc="best",
                      facecolor="inherit",
                      frameon=True)
        else:
            ax.legend_ = None
        plt.tight_layout(w_pad=1)

    for i, col in enumerate(cat_cols, 1):
        if i == 3:
            continue
        ax = fig.add_subplot(2, 3, i)
        __plot_graph(df, col=col, ax=ax)

    ax2 = fig.add_axes((0.74, 0.527, 0.23, 0.35))
    __plot_graph(df, col="Payment_Behaviour", ax=ax2, legend=True)
    plt.show(True)

In [None]:
make_countplot(df)

**Correlation Heatmap**

In [None]:
corr = data.drop(["is_train"], axis=1).corr(numeric_only=True)
mask = np.triu(np.ones_like(corr, dtype=bool))

fig = plt.figure(figsize=(10, 6), dpi=150)

sns.heatmap(corr, annot=True, mask=mask, fmt=".0%", annot_kws={"size":10})
plt.grid(False)
plt.tick_params(axis="both", labelsize=5)
plt.tight_layout()
plt.title("Correlation Matrix")
plt.show()

In [None]:
df.loc[~df["is_train"], "Month"].value_counts()

**Hypothesis Testing with Chi-Square and F-Tests**  
Assess the relationship between categorical and numerical columns in a DataFrame and the target variable 'Credit_Score.'  
The Chi-Square test evaluates the independence between categorical variables and 'Credit_Score,' while the F-tests assess the variance in 'Credit_Score' explained by numerical variables. The results include the test statistic and p-value for each column, helping identify significant factors influencing 'Credit_Score.'

In [None]:
def chi_2_test(df:pd.DataFrame):
    df_copy = df.loc[df["is_train"]].copy()
    cat_cols = df_copy.select_dtypes(exclude="number").columns.drop(["Customer_ID", "Month", "is_train", "Credit_Score"])
    numb_columns = df_copy.select_dtypes(include="number").columns

    summary = np.empty((len(cat_cols)+len(numb_columns), 3), dtype="object")
    y, *_ = df_copy["Credit_Score"].factorize(sort=False)

    def perform_test(*args, test):

        if test == "chi2":
            for i, col in enumerate(args[0]):
                cross = pd.crosstab(index=df[col], columns=[df["Credit_Score"]])
                t_stat, pvalue, *_ = chi2_contingency(cross)
                summary[i, :] = [col, t_stat, pvalue]

        elif test=="f_classif":
            for i, col in enumerate(args[0], start=len(summary)-len(args[0])):
                t_stat, pvalue = f_classif(df_copy[[col]], y.reshape(-1, 1))
                summary[i, :] = [col, t_stat[0], pvalue[0]]

    perform_test(cat_cols, test="chi2")
    perform_test(numb_columns, test="f_classif")

    return pd.DataFrame(
        data=summary,
        columns=["column", 't-statistic', "p-value"]
    )

In [None]:
chi2_summary = chi_2_test(data).sort_values(by="t-statistic", ascending=False)

In [None]:
chi2_summary.style.bar("t-statistic").background_gradient(
    "Blues", subset="t-statistic")

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))
sns.barplot(data=chi2_summary, y="column", x="t-statistic", ax=ax)
plt.setp([ax.get_xticklabels(), ax.get_yticklabels()], size=8)
fig.show()

In [None]:
def transform_columns(data_frame: pd.DataFrame):
    category = data_frame.select_dtypes(exclude="number").columns.tolist()
    number =data_frame.select_dtypes(include="number").columns.tolist()


    def build_pipeline_numb(strategy="median"):
        pipe1 = Pipeline(steps=[
            ("imputer", IterativeImputer(initial_strategy=strategy, random_state=42)),
            ("scaling", MinMaxScaler()),
            # ("pca", PCA(0.98))
            # ("scaling", PowerTransformer())

        ])
        return pipe1

    def build_pipeline_cat(strategy="most_frequent"):
        pipe2 = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy=strategy))
])
        return pipe2

    transformer = ColumnTransformer(
        [
            ("cat_transformer", build_pipeline_cat(), category),
            ("numb_transformer", build_pipeline_numb(), number)
        ],
        remainder="drop"
    )
    transformer.fit(data_frame)
    return transformer

**Data Transformation with Custom Pipelines**

In [None]:
def split_data(data: pd.DataFrame, test_size=0.2) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
    df_copy = data.copy()
    df_copy.drop(columns = ["Month", "Customer_ID"], inplace=True)
    df_copy["Delay_from_due_date"] = df_copy["Delay_from_due_date"].abs()

    train_set = df_copy[df_copy["is_train"]].drop(["is_train"], axis=1)
    test_set = df_copy[df_copy["is_train"] == False].drop(
        ["is_train", "Credit_Score"], axis=1).reset_index(drop=True)

    X = train_set.drop("Credit_Score", axis=1)
    y = train_set.pop("Credit_Score")

    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=test_size, stratify=y, random_state=2, shuffle=True)
    return ((Xtrain, Xtest, ytrain, ytest), test_set)


def combined_transformers(data, save_to_pickle=False):
    outlier_remover = ClipOutliersTransformer(0.25, 0.75, multiply_by=1.5, replace_with_median=False)
    outlier_remover.fit(data.select_dtypes("number"))
    transformer = transform_columns(data)
    if save_to_pickle:
        file1 = open("OutlierRemover.pkl", "wb")
        file2 = open("ColumnsTransformers.pkl", "wb")
        pickle.dump(outlier_remover, file1)
        pickle.dump(transformer, file2)
    return (outlier_remover, transformer)


def get_transformed_data(data, transformers_to_pkl=False) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    # transformer = kwargs.get("transformer")
    train_set, test_set = split_data(data)
    Xtrain, Xtest, ytrain, ytest = train_set
    _, transformer = combined_transformers(Xtrain, save_to_pickle=transformers_to_pkl)

    columns = list(itertools.chain.from_iterable([transformer[-1] for transformer in transformer.__dict__.get("transformers")]))
    Xtrain = pd.DataFrame(transformer.transform(Xtrain), columns=columns).apply(pd.to_numeric, errors="ignore")
    Xtest = pd.DataFrame(transformer.transform(Xtest), columns=columns).apply(pd.to_numeric, errors="ignore")
    test_set = pd.DataFrame(transformer.transform(test_set), columns=columns).apply(pd.to_numeric, errors="ignore")
    return ((Xtrain, Xtest), (ytrain, ytest), test_set)

In [None]:
((Xtrain, Xtest), (ytrain, ytest), test_set) = get_transformed_data(data, transformers_to_pkl=True)

cat_columns = Xtrain.select_dtypes(exclude="number").columns.drop(["Type_of_Loan"]).tolist()
text_column = ["Type_of_Loan"]

**CatBoost Model Training Functions**

In [None]:
def create_pool(xtrain, ytrain, xtest, ytest, cat_features, text_features):
    Xtrain_pool = Pool(data=xtrain, label=ytrain, cat_features=cat_features, text_features=text_features)
    Xtest_pool = Pool(data=xtest, label=ytest, cat_features=cat_features, text_features=text_features)
    return (Xtrain_pool, Xtest_pool)


def catboost_model(params = {}):
    cb_params = {
        "iterations": 10000,
        "custom_metric": ["F1", "AUC", "Accuracy"],
        "thread_count":-1,
        "random_state": 42,
        "train_dir": "/catboost"

    }
    cb_params.update(params)

    model = CatBoostClassifier(**cb_params)
    return model


def fit_model(xtrain, ytrain, xtest, ytest, cat_features, text_features, params = {}, **kwargs):
    Xtrain_pool, Xtest_pool = create_pool(xtrain, ytrain, xtest, ytest, cat_features, text_features)
    model = catboost_model(params = params)
    model.fit(Xtrain_pool, eval_set=Xtest_pool, early_stopping_rounds=500, **kwargs)
    return model

**CatBoost Classifier Training**

In [None]:
model = fit_model(
    Xtrain, ytrain,
    Xtest, ytest,
    cat_columns, text_column,
    plot=False,
    verbose=500,
    params = {
        "text_processing": ["NaiveBayes+Word|BoW+Word"],
        "task_type": "GPU",
        "one_hot_max_size": 3,
        "depth": 6,
        "auto_class_weights": "Balanced",
        "bootstrap_type": "Poisson",
        "subsample":0.5,
        "max_bin": 100

    }
)

In [None]:
from google.colab import output
output.enable_custom_widget_manager()

In [None]:
import catboost

w = catboost.MetricVisualizer('/catboost/')
w.start()

In [None]:
Xtrain_pool, Xtest_pool = create_pool(Xtrain, ytrain, Xtest, ytest, cat_columns, text_column)

In [None]:
importance = model.get_feature_importance(Xtest_pool, type="LossFunctionChange", prettified=True)

In [None]:
sns.barplot(data=importance, y="Feature Id", x = "Importances")
plt.title("Feature Importance in CatBoost")
plt.show()

**Model Refinement with Feature Selection**

In [None]:
df_copy = data.copy().drop(importance.query("Importances < 0.000")["Feature Id"], axis=1)
((Xtrain1, Xtest1), (ytrain1, ytest1), test_set) = get_transformed_data(df_copy, transformers_to_pkl=True)

cat_columns = Xtrain1.select_dtypes(exclude="number").columns.drop(["Type_of_Loan"]).tolist()
text_column = ["Type_of_Loan"]

In [None]:
model_refined = fit_model(
    Xtrain1, ytrain1,
    Xtest1, ytest1,
    cat_columns, text_column,
    plot=False,
    verbose=500,
    params = {
        "text_processing": ["NaiveBayes+Word|BoW+Word"],
        "task_type": "GPU",
        "one_hot_max_size": 3,
        "auto_class_weights": "Balanced",
        "max_depth": 7,
        "bootstrap_type": "Poisson",
        "subsample":0.5,
        "max_bin": 100
    }
)

In [None]:
model_refined.save_model("credit_score_prediction_model-cb-v1")

**Model Evaluation**

In [None]:
loaded_model = CatBoostClassifier()
loaded_model.load_model(r"credit_score_prediction_model-cb-v1")

In [None]:
y_hat = loaded_model.predict(Xtest[loaded_model.feature_names_])
acc_score = accuracy_score(ytest, y_hat)
f1 = f1_score(ytest, y_hat, average=None)
precision = precision_score(ytest, y_hat, average=None)
recall = recall_score(ytest, y_hat, average=None)

print(f"Accuracy: {acc_score:.2%}\n\
F1 Score: {dict(zip(loaded_model.classes_, map(lambda x: f'{x:.2%}', f1)))}\n\
Precision Score: {dict(zip(loaded_model.classes_, map(lambda x: f'{x:.2%}', precision)))}\n\
Recall Score: {dict(zip(loaded_model.classes_, map(lambda x: f'{x:.2%}', recall)))}\n\
")


cm = confusion_matrix(ytest, y_hat)
cm_norm = confusion_matrix(ytest, y_hat, normalize="true")
annot = np.array([f"{i}\n({g:.1%})" for i, g in zip(cm.flatten(), cm_norm.flatten())])
annot = annot.reshape(cm.shape)

fig = plt.figure(dpi=90)
sns.heatmap(cm, annot=annot, fmt="", xticklabels=loaded_model.classes_, yticklabels=loaded_model.classes_)
plt.title("Confusion Matrix")
plt.show()

**Model Evaluation Metrics**

In [None]:
print(classification_report(ytest1, model_refined.predict(Xtest1)))