## Functions

#### Encode Column

In [7]:
def enc_col(df, col, vals, codes, new_col_name):
  
    """Takes in a pandas dataframe, a column in the dataframe, a tuple of values to encode, 
       a tuple of numeric codes for the values, and a new column name. Numerically encodes 
       and renames the column. Returns the new column's value counts."""
    
    col_map = dict(zip(vals, codes))
    df[col] = df[col].map(col_map).astype('int64')
    df.rename(columns={col: new_col_name}, inplace=True)
    return df[new_col_name].value_counts()

#### Percentage Dictionary

In [8]:
def perc_dict(df, col_1, col_2, val_in_col_2):
    
    """Takes in a pandas dataframe, a first column, a second column, and a value in the second column. 
       Returns a dictionary with the proportion of rows in column 1--for each unique value in column 1--
       that also contain the given value for column 2. Best used with columns containing a limited number 
       of unique values. Column and value names must be in quotes."""
    
    col_dict = {}
    
    for val in df[col_1].unique():
        
        rows_with_val = df[df[col_1] == val]
        total = rows_with_val[rows_with_val[col_2] == val_in_col_2]
        percentage = round(len(total)/len(rows_with_val) * 100, 1)
        col_dict[val] = percentage
    
    return col_dict

#### Plot Percentage Dictionary

In [9]:
def plot_perc_dict(perc_dict, title):
    
    """Takes in a dictionary returned by the perc_dict function and a bar plot title.
       Returns a 20x15 bar plot the dictionary, with values on the x-axis and keys on the y-axis."""
    
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    plt.figure(figsize=(15, 10))
    
    sns.barplot(x=list(perc_dict.keys()), y=list(perc_dict.values()))
    plt.ylabel("Percentage", fontsize=12)
    plt.title(title, fontname='silom')
        
    locs, labels=plt.xticks()
    x_ticks = []
    new_xticks = list(perc_dict.keys())
    plt.xticks(locs,new_xticks, rotation=35, horizontalalignment='right')
    plt.plot()

#### Plot Confusion Matrix with Scores

In [10]:
def plot_cm(model, model_type, display_label_1, display_label_2):
   
    """Takes in a fitted sklearn binary classifier and strings for the the model type, first display label, 
       and second display label. Returns a 7x5 confusion matrix with accuracy, precision, recall and F1 scores
       plotted to the right of the matrix."""
    
    from sklearn.metrics import plot_confusion_matrix
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import f1_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import precision_score
    
    y_preds = model.predict(X_test)
    class_names = [display_label_1, display_label_2]
    fig, ax = plt.subplots(figsize=(7, 5))
    plot_confusion_matrix(model, X_test, y_test,
                                 display_labels=class_names,
                                 cmap=plt.cm.PuBu, ax=ax)
    plt.text(x=-.5, y=-.6, s="{} Confusion Matrix".format(model_type), fontsize=15, fontname='silom')
    plt.text(x=2.1, y=.1, s="Accuracy: {}".format(float(round(accuracy_score(y_test, y_preds),4))), fontsize=14)
    plt.text(x=2.1, y=.3, s="Precision: {}".format(float(round(precision_score(y_test, y_preds), 4))),fontsize=14)
    plt.text(x=2.1, y=.5, s="Recall: {}".format(float(round(recall_score(y_test, y_preds), 4))),fontsize=14)
    plt.text(x=2.1, y=.7, s="F1: {}".format(float(round(f1_score(y_test, y_preds), 4))),fontsize=14)
    
    plt.show()