In [1]:
#importing necessary library
#importing matplotlib 
import matplotlib.pyplot as plt
#importing seaborn
import seaborn as sea
#importing pandas 
import pandas as pd
#importing numpy
import numpy as np

#importing job-lib
import joblib

# importing Listed-Color-Map
from matplotlib.colors import ListedColormap

# File contains frequently used methods at one place

#### preprocessing data methods

In [2]:
# a general pre - processing method for click ad data set
def pre_processing_ad(data):
    data.drop(['User ID','Gender'],axis=1,inplace=True)
    x_values = data[['Age','EstimatedSalary']].values
    y_values = data[['Purchased']].values
    return x_values,y_values

In [3]:
# a general pre - processing method for hiv data set
def pre_processing_hiv(data,encoder,flag=0):
    x_values = np.ones((1,8))
    for value in data['octamer']:
        # creating numpy array of list of characters in each record & appending row at end
        x_values = np.append(x_values, np.array(list(value)).reshape(1,8), axis=0)
    if flag==0:
        x_values = encoder.fit_transform(x_values)
    else:
        x_values = encoder.transform(x_values)
    x_values = x_values[1:]
    y_values = data['cleaves']
    return x_values,y_values,encoder

##### fir or predict method

In [4]:
def fit_or_predict(x,y,classifier,task =0):
    if task !=0:
        classifier.fit(x,y)
        return classifier
    else:
        return classifier.predict(x) 

#### plot colormap 

In [5]:
def plot_colormap(x_set, y_set, title,classifier,mymap):
    """Function plots colormap showing linear classification using decision boundary"""
    # createting grid of continuous points in given range of values from two columns of feature
    # meshgrid returns matrices for their cartesian product after giving set of arrays 
    x1_grids, x2_grids = np.meshgrid(np.arange(x_set[:,0].min(), x_set[:,0].max(), 0.001), 
                            np.arange(x_set[:,1].min(), x_set[:,1].max(), 0.001))
    
    # we are patitioning data-ponts using decision boundary so coloring area on either side
    # created feature matrix for area/continuous values from grid points from 2 features 
    x_continuous_values = np.array([x1_grids.flatten(), x2_grids.flatten()]).T
    
    # plotting area i.e continuous points & classification using regressor prediction 
    plt.contourf(x1_grids, x2_grids, classifier.predict(x_continuous_values).reshape(x1_grids.shape),
                alpha=0.6, cmap= mymap)
    
    # 
    plt.xlim(x1_grids.min(), x1_grids.max())
    plt.ylim(x2_grids.min(), x2_grids.max())
    
    # plotting actual classified data-points/observations with thier repective category
    for j in np.unique(y_set):
        plt.scatter(x_set[np.where(y_set==j), 0],x_set[np.where(y_set==j),1],
                   c=('red', 'blue')[j], label=j, s=6)
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')     # labeling axes
    plt.legend()
    plt.title(title)
    plt.show()

#### Plot CAP Curve 

In [6]:
def plot_cap(X_test,y_test,label,classifier):
    total = len(y_test)
    class_1_count = abs(np.sum(y_test))
    class_0_count = abs(total - class_1_count)
    plt.figure(figsize = (10, 8))

    # Random Model
    plt.plot([0, total], [0, class_1_count], c = 'r', linestyle = '--', label = 'Random Model')

    # Perfect Model
    plt.plot([0, class_1_count, total], 
             [0, class_1_count, class_1_count], 
             c = 'grey', 
             linewidth = 2, 
             label = 'Perfect Model')

    # Trained Model
    probs = classifier.predict_proba(X_test)
    probs = probs[:, 1]
    model_y = [y for _, y in sorted(zip(probs, y_test), reverse = True)]
    y_values = np.append([0], np.cumsum(model_y))
    x_values = np.arange(0, total + 1)
    plt.plot(x_values, 
             y_values, 
             c = 'b', 
             label = label, 
             linewidth = 4)

    # Plot information
    plt.xlabel('Total observations', fontsize = 16)
    plt.ylabel('Class 1 observations', fontsize = 16)
    plt.title('Cumulative Accuracy Profile', fontsize = 16)
    plt.legend(loc = 'lower right', fontsize = 16)