In [15]:
%run Imports.ipynb

In [21]:
def Predict(df, scaler = None, classifier = RandomForestClassifier(n_estimators=101, random_state=0), test_size = 0.25, disp = False):
    """ Method for train/test with split 
    Parameters:
        df (pandas):dataframe
        scaler (object): Instance of scaler
        classifier(object) : Instance of Classsifier
        test_size(float) : size of tests
        disp(bool) : display sns heatmap conf. matrix
    Returns:
        df(df):Modified dataframe  
    """
    X = df.loc[:, df.columns != 'target']
    y = df.loc[:, df.columns == 'target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
    X_train_r = X_train.copy()
    if scaler is not None:
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    classifier.fit(X_train, y_train.values.ravel())
    y_pred = classifier.predict(X_test)
    print("---------------------Summary------------------")
    print(classification_report(y_test,y_pred))
    print("----------------Confusion_matrix--------------")
    print(confusion_matrix(y_test,y_pred))
    print("--------------------Accuracy------------------")
    print(accuracy_score(y_test, y_pred))
    
    if disp == True:
        df_cm = confusion_matrix(y_test,y_pred)
        #plt.figure(figsize = (10,7))
        sns.set(font_scale=1.4)#for label size
        sns.heatmap(df_cm, annot=True,annot_kws={"size": 16})# font size
        plt.title('Confusion matrix')
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()
    
    return  classifier , X_train_r

def PredictRegression(df, scaler = None, classifier = RandomForestClassifier(n_estimators=101, random_state=0), test_size = 0.25):
    """ Method for train/test with split 
    Parameters:
        df (pandas):dataframe
        scaler (object): Instance of scaler
        classifier(object) : Instance of Classsifier
        test_size(float) : size of tests
    Returns:
        df(df):Modified dataframe  
    """    
    X = df.loc[:, df.columns != 'target']
    y = df.loc[:, df.columns == 'target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
    X_train_r = X_train.copy()
    if scaler is not None:
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    classifier.fit(X_train, y_train.values.ravel())
    y_pred = classifier.predict(X_test)
    
    print("--------------------MAPE------------------")
    print(mean_absolute_percentage_error(y_train, y_pred))

    return  classifier , X_train_r

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = check_array(y_true, y_pred)

    ## Note: does not handle mix 1d representation
    #if _is_1d(y_true): 
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100