In [3]:
# Importing all the tools required

# Regular EDA (Exploratory Data Analysis) and plotting libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline 
# We want to view the plots in the IDE

# Models from Scikit-learn
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

# Model Evaluations
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import RocCurveDisplay, roc_auc_score, roc_curve, auc

In [4]:
# Creating a function for fitting and scoring the models
def fit_and_score(models, X_train, X_test, y_train, y_test):
    '''
    Fits and trains the model then provide a score usind the test data set.
    About the parameters :-
    * models : Provide the variable containing the models.
    * X_train : Training data without the labels
    * X_test : Test data without the labels
    * y_train : Training labels
    * y_test : Test labels 

    '''

    # Set a random seed 
    np.random.seed(45)

    # Model score dictionary to append the scores 
    model_score = {}

    # Looping through models
    for name, model in models.items():
        # Fitting the model  
        model.fit(X_train, y_train)

        # Appending the score of the model into the dictionary
        model_score[name] = model.score(X_test, y_test)        

    return model_score

In [5]:
# Visualizing the confusion matrix using seaborne
sns.set_theme(font_scale=1.5) # Increase font size
 
def plot_conf_mat(y_test, y_preds):
    """
    Plots a confusion matrix using Seaborn's heatmap().
    """
    fig, ax = plt.subplots(figsize=(3, 3))
    ax = sns.heatmap(confusion_matrix(y_test, y_preds),
                     annot=True, # Annotate the boxes
                     cbar=False)
    plt.xlabel("Predicted label") # predictions go on the x-axis
    plt.ylabel("True label") # true labels go on the y-axis 
    


In [6]:
def cv_eval_metric(name, model) :

    model_name = str(name)

    accuracy = cross_val_score(model,
                         X,
                         y,
                         cv= 5,
                         scoring= 'accuracy')
    accuracy = np.mean(accuracy)

    precision = cross_val_score(model,
                         X,
                         y,
                         cv= 5,
                         scoring= 'precision')
    precision = np.mean(precision)

    recall = cross_val_score(model,
                         X,
                         y,
                         cv= 5,
                         scoring= 'recall')
    recall = np.mean(recall)

    f1_score = cross_val_score(model,
                         X,
                         y,
                         cv= 5,
                         scoring= 'f1')

    f1_score = np.mean(f1_score)


    metric_name = ['Accuracy', 'Precision', 'Recall', 'F1 score']
    metrics_var = [accuracy, precision, recall, f1_score]
    metrics = dict(zip(metric_name, metrics_var))

    
    return model_name, metrics