In [5]:
from matplotlib import pyplot as plt
import seaborn as sns

In [6]:
def addlabels(x,y):
    for i in range(len(x)):
        plt.text(i, y[i], y[i], ha = 'center')

In [7]:
def frequency_table(x,str_name):
    freqt = x.value_counts()
    freqt = pd.DataFrame(freqt).reset_index()
    freqt.columns = [str_name,"frequency"]
    return freqt

In [8]:
def frequency_table_visual(frqt):
    plt.figure(figsize = (20, 6)) # fix figure size
    plt.bar(frqt.iloc[:,0].values, frqt.iloc[:,1].values)
    addlabels(frqt.iloc[:,0], frqt.iloc[:,1])
    plt.xticks(rotation=360, ha='center') # rotate x label
    plt.show()

In [9]:
def crosstable_visual(ct, ctp):
    ctp.plot(kind='bar', 
             stacked=True, 
             colormap='tab20c',
             figsize = (20, 12))
    plt.legend(loc="upper left", ncol=2)
    plt.xlabel("")
    plt.ylabel("Proportion")
    plt.xticks(rotation=360, ha='center') # rotate x label
    for n, x in enumerate([*ct.index.values]):
        for (proportion, count, y_loc) in zip(ctp.loc[x],
                                          ct.loc[x],
                                          ctp.loc[x].cumsum()):
            plt.text(x=n -0.1,
                 y=(y_loc - proportion) + (proportion / 2),
                 s=f'{count}\n({np.round(proportion * 100, 1)}%)', 
                 color="black",
                 fontsize=12,
                 fontweight="bold")

plt.show()

In [1]:
def crosstable_visual_notext(ct, ctp):
    ctp.plot(kind='bar', 
             stacked=True, 
             colormap='tab20c',
             figsize = (20, 12))
    plt.legend(loc="upper left", ncol=2)
    plt.xlabel("")
    plt.ylabel("Proportion")
    plt.xticks(rotation=360, ha='center') # rotate x label
    plt.show()

In [1]:
def group_histograms(data, num_col, cat_col):
    loan_status_level = data[cat_col].unique()
    
    plt.figure(figsize=(20,12))
    
    for status in loan_status_level:
        loan_subset = data[data[cat_col]==status]
        sns.distplot(loan_subset[num_col],
                label = status, hist = True, kde = False,
                norm_hist = False,
                hist_kws = {"edgecolor":"black"})
    plt.legend(prop={'size': 16}, title =  cat_col )
    
    plt.title("Histogram of " + num_col +" by " +cat_col)
    plt.xlabel(num_col)
    plt.ylabel('Counts')
    plt.show()

In [2]:
def plot_roc_curve(fpr,tpr,label=None):
    plt.plot(fpr,tpr,linewidth=2,label=label)
    plt.plot([0,1],[0,1],'k--')
    plt.xlabel("False Positive Rate (1-specificity)")
    plt.ylabel("True Positive Rate (Recall)")
    plt.show()

In [3]:
def plot_precision_recall_vs_threshold(precisions,recall,thresholds):
    plt.plot(thresholds,precisions[:-1],"b--",label = "Precision")
    plt.plot(thresholds, recalls[:-1],"g-",label = "Recall")
    plt.xlabel("threshold")
    plt.legend(loc = "center left")
    plt.show()

In [4]:
def confusion_metrix_vis (cnf_matrix):
    class_names=[0,1] # name  of classes
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)
    # create heatmap
    sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
    ax.xaxis.set_label_position("top")
    plt.tight_layout()
    plt.title('Confusion matrix', y=1.1)
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')