## Model Evaluation Function
### Description : Contains Different Type of model performance evalution function

#### Script Version : 1.0.0
#### Script Written by : Loh Khai Shyang 
#### Script Date : 22 Nov 2021
#### Contact Email : irvinekhai@gmail.com

In [None]:
def XGB_Regression_model_evaluation_function (model,label,over_under_reject_analyze_feature,output_spec_limits,train_df,test_df,X_train,x_test,Y_train,y_test):
    ### 1. Plot Model performance RMSE of training and validation set
    ### 2. Confusion matrix scatter plot of "training data" and "testing data" [ evaluate how good is the model performance on training and testing ]
        # use train model re-predict "training data " and plot scatter confusion matrix
    ### 3. Plot Over Reject Graph 
    ### 4. Plot Over Reject Graph 
    
    ### Hyper Parameter
        # 1. model = Trained model 
            # NOTE !! Model XGb must have "eval_set"
            # "eval_set" can have eval_set=[(X_train,Y_train)] or eval_set=[(X_train,Y_train),(x_test,y_test)]
            # Example : model.fit(X_train,Y_train, early_stopping_rounds=20, eval_set=[(X_train,Y_train),(x_test,y_test)], eval_metric='rmse')
        # 2. label = Y output name
        # 3. output_spec_limits = output spec limit
                # is a list = [lower spec limit , upper spec limit ]
                # if no spec = 'Null' , Ex : [ 35, 'Null']

        ### NOTE !!! [ Dataset below consist of all original value from original dataset ]
        # 4. train_df = include all features in dataset  [ exclude output feature columns] --- [ Note!!!   training_df + testing_df = original dataset ]
        # 5. testing_df = include all features in dataset  [ exclude output feature columns] --- [ Note!!!   training_df + testing_df = original dataset ]
        
        ### NOTE !!! [ Dataset below is ready to fit into model for training - done normalization/stadardization or getdummies]
        # 6. X_train / x_test = dataframe include input features that are necessary to train the model 
        # 7. Y_train / y_test = dataframe include output features that are necessary to train the model

    
    import matplotlib.pyplot as plt
    import seaborn as sns 
    import numpy as np
    import pandas as pd
    import matplotlib.lines as mlines

    
    def _mean_max_stdev_calculate_function (variable):
        ### Calculate and return "variable" : Min, Max, Mean, Stdev ###
        
        mean = round(np.mean(variable),2)
        stdev =round(np.std(variable),2)
        maximum = round(np.max(variable),2)
        minimum = round(np.min(variable),2)
        
        return minimum,maximum,mean,stdev
    
    
    def _evaluate_train_test_model_function(model,label,output_spec_limits,train_df,test_df,X_train,x_test,Y_train,y_test):
        ### Plot 1. Train Test Confusion Matrix Plot 
        ### Plot 2. Distplot with Distibution Plot [Absolute_Residual = abs("Predict" - "Actual")] ( Turn on to draw ) 
        ### Plot 3. Distplot with Density Plot ( Turn on to draw ) 
        ### Plot 4. Distplot with Distibution Plot [Residual = "Predict" - "Actual"] ( Turn on to draw ) 
    
        
        ### Return train_df, test_df
            # Train_df = training dataset consist of "Actual Output column" and "Predicted Output column"
            # Test_df = Testing dataset consist of "Actual Output column" and "Predicted Output column"
        
        
        
        ##  1. Plot 1. Train Test Confusion Matrix Plot  ##
        
        train_pred = model.predict(X_train)
        test_pred = model.predict(x_test)
        
        # Statistical value #
        train_actual_min, train_actual_max, train_actual_mean, train_actual_stdev = _mean_max_stdev_calculate_function(Y_train)
        train_pred_min, train_pred_max, train_pred_mean, train_pred_stdev = _mean_max_stdev_calculate_function(train_pred)
        
        test_actual_min, test_actual_max, test_actual_mean, test_actual_stdev = _mean_max_stdev_calculate_function (y_test)
        test_pred_min, test_pred_max, test_pred_mean, test_pred_stdev = _mean_max_stdev_calculate_function (test_pred)
        
        
        fig, ax = plt.subplots(figsize=(15,15))
        if output_spec_limits[0] != 'Null' :
            ax.axvline(output_spec_limits[0], 
                       linestyle='--',
                       color='r' , 
                       label='Spec L.limit:'+str(output_spec_limits[0]))  # Plot Actual Ground Truth Lower Limit line
            ax.axhline(output_spec_limits[0], 
                       linestyle='--',
                       color='r') # Predicted Ground Truth Value
        if output_spec_limits[1] != 'Null' :
            ax.axvline(output_spec_limits[1], 
                       linestyle='--',
                       color='m',
                       label='Spec L.limit:'+str(output_spec_limits[1]))  # Plot Actual Ground Truth Lower Limit line
            ax.axhline(output_spec_limits[1], 
                       linestyle='--',
                       color='m') # Predicted Ground Truth Value
            
        ax.scatter(Y_train, 
                   train_pred, 
                   color = 'b', 
                   marker = 'o',
                   label = label+'_Train_Actual [ mean:'+str(train_actual_mean)+', std:'+str(train_actual_stdev)+', max:'+str(train_actual_max)+', min:'+str(train_actual_min)+']\n'+label+'_Train_Pred [ mean:'+str(train_pred_mean)+', std:'+str(train_pred_stdev)+', max:'+str(train_pred_max)+', min:'+str(train_pred_min)+']'
                  ) # Scatter plot Training Dataset of "actual vs predicted"
        ax.scatter(y_test, 
                   test_pred, 
                   color='r', 
                   marker ='o',
                   label = label+'_Test_Actual [ mean:'+str(test_actual_mean)+', std:'+str(test_actual_stdev)+', max:'+str(test_actual_max)+', min:'+str(test_actual_min)+']\n'+label+'_Test_Pred [ mean:'+str(test_pred_mean)+', std:'+str(test_pred_stdev)+', max:'+str(test_pred_max)+', min:'+str(test_pred_min)+']'
                  ) # Scatter plot Testing Dataset of "actual vs predicted"
        plt.xlabel('Actual '+label)
        plt.ylabel('Predicted '+label)
        plt.title('['+label+'] Train Test Confusion Matrix Scatter Plot')
        plt.legend(fancybox=True, framealpha=1, shadow=True, borderpad=1, fontsize=8)
        
        
        
        ## 2. Plot Residual train test Distribution/Density plot  ##
            # Residual = absolute ( "Y_actual_value" - " Y_Predicted_value")
            
        train_df[label+'_Actual'] = Y_train
        train_df[label+'_Predict'] = train_pred
        train_df[label+'_Residual'] = train_df[label+'_Predict'] - train_df[label+'_Actual']  
        train_df[label+'_Absolute_Residual'] = abs(train_df[label+'_Predict'] - train_df[label+'_Actual'] )
        
        test_df[label+'_Actual'] = y_test
        test_df[label+'_Predict'] = test_pred
        test_df[label+'_Residual'] = test_df[label+'_Predict'] - test_df[label+'_Actual']
        test_df[label+'_Absolute_Residual'] = abs(test_df[label+'_Predict'] - test_df[label+'_Actual'])
        
        
        train_min, train_max, train_mean, train_stdev =_mean_max_stdev_calculate_function(train_df[label+'_Residual']) # calculate statistical value
        test_min, test_max, test_mean, test_stdev =_mean_max_stdev_calculate_function(test_df[label+'_Residual']) # # calculate statistical value
        
        train_ab_min, train_ab_max, train_ab_mean, train_ab_stdev =_mean_max_stdev_calculate_function(train_df[label+'_Absolute_Residual']) # calculate statistical valu
        test_ab_min, test_ab_max, test_ab_mean, test_ab_stdev =_mean_max_stdev_calculate_function(test_df[label+'_Absolute_Residual']) # # calculate statistical value
 


        if len(model.evals_result().keys()) ==1: # model have only 1 key means only have  training  "eval_set" ( see function hyperparameter discription "model")

            score_type = list(model.evals_result()['validation_0'].keys())[0] # find the "eval_metric" type used in this model
            model_train_score = model.evals_result()['validation_0'][score_type][-1] # y-axis evaluation training result scores

            ## Plot 2. Distplot with Distibution Plot [Absolute_Residual = abs("Predict" - "Actual")] ( Turn on to draw ) ##
            fig,ax1 = plt.subplots(figsize=(15,15))
            sns.distplot(train_df[label+'_Absolute_Residual'], ax=ax1, color='b', kde=False)
            sns.distplot(test_df[label+'_Absolute_Residual'], ax=ax1, color='r', kde=False)
            ax1.axvline(3*(train_ab_stdev + test_ab_stdev)/2, 
                        linestyle='--',
                        color='r')  # Plot +ve 3* sigma 
            plt.xlabel('Residual Value')
            plt.ylabel('Counts')
            plt.legend(['+3*sigma',
                        '<Train Residue> mean:'+str(train_ab_mean)+' std:'+str(train_ab_stdev)+' min:'+str(train_ab_min)+' max:'+str(train_ab_max)+' '+str(score_type)+':'+str(model_train_score),
                        '<Test Residue> mean:'+str(test_ab_mean)+' std:'+str(test_ab_stdev)+' min:'+str(test_ab_min)+' max:'+str(test_ab_max)
                       ])
            plt.title('Model Residual Error Distribution Plot [Absolute_Residue = abs("Actual" - "Predicted")]')

            
            ## Plot 3. Distplot with Density Plot ( Turn on to draw ) ##
            fig,ax1 = plt.subplots(figsize=(15,15))
            sns.distplot(train_df[label+'_Absolute_Residual'], ax=ax1, color='b', kde=True)
            sns.distplot(test_df[label+'_Absolute_Residual'], ax=ax1, color='r', kde=True)
            ax1.axvline(3*(train_ab_stdev + test_ab_stdev)/2, 
                        linestyle='--',
                        color='r')  # Plot +ve 3* sigma 
            plt.xlabel('Residual Value')
            plt.ylabel('Density')
            plt.legend(['Train Cummulative Density Line',
                        'Test Cummulative Density Line',
                        '+3*sigma',
                        '<Train Residue> mean:'+str(train_ab_mean)+' std:'+str(train_ab_stdev)+' min:'+str(train_ab_min)+' max:'+str(train_ab_max)+' '+str(score_type)+':'+str(model_train_score),
                        '<Test Residue> mean:'+str(test_ab_mean)+' std:'+str(test_ab_stdev)+' min:'+str(test_ab_min)+' max:'+str(test_ab_max)
                       ])
            plt.title('Model Residual Error Density Plot [Absolute_Residue = abs("Actual" - "Predicted")]')

            ## Histplot  ( Turn on to draw ) ##
    #         fig,ax1 = plt.subplots(figsize=(15,15))
    #         ax1.hist(train_df[label+'_Residual'], color='b')
    #         ax1.hist(test_df[label+'_Residual'], color='r')
    #         plt.legend(['Train','Test'])
    #         plt.title('Model Residual Error Distribution Plot [Residue = abs("Actual" - "Predicted")]')
        
        

            ## Plot 4. Distplot with Distibution Plot [Residual = "Predict" - "Actual"] ( Turn on to draw ) ##
            fig,ax3 = plt.subplots(figsize=(15,15))
            sns.distplot(train_df[label+'_Residual'], ax=ax3, color='b', kde=False)
            sns.distplot(test_df[label+'_Residual'], ax=ax3, color='r', kde=False)
            ax3.axvline(3*(train_stdev + test_stdev)/2, 
                        linestyle='--',
                        color='r')  # Plot +ve 3* sigma 
            ax3.axvline(-3*(train_stdev + test_stdev)/2, 
                        linestyle='--',
                        color='m')  # Plot +ve 3* sigma 
            plt.xlabel('Residual Value')
            plt.ylabel('Counts')
            plt.legend(['+3*sigma',
                        '-3*sigma',
                        '<Train Residue> mean:'+str(train_mean)+' std:'+str(train_stdev)+' min:'+str(train_min)+' max:'+str(train_max)+' '+str(score_type)+':'+str(model_train_score),
                        '<Test Residue> mean:'+str(test_mean)+' std:'+str(test_stdev)+' min:'+str(test_min)+' max:'+str(test_max)
                       ])
            plt.title('Model Residual Error Distribution Plot [Residue = "Actual" - "Predicted"]')
            
       
        else: # else model have only 2 key means only have  training and testing "eval_set" ( see function hyperparameter discription "model")
            
            score_type = list(model.evals_result()['validation_0'].keys())[0] # find the "eval_metric" type used in this model
            model_train_score = model.evals_result()['validation_0'][score_type][-1] # y-axis evaluation Training result scores
            model_test_score = model.evals_result()['validation_1'][score_type][-1] # y-axis evaluation Testing result scores
            
            
            ## Plot 2. Distplot with Distibution Plot [Absolute_Residual = abs("Predict" - "Actual")] ( Turn on to draw ) ##
            fig,ax1 = plt.subplots(figsize=(15,15))
            sns.distplot(train_df[label+'_Absolute_Residual'], ax=ax1, color='b', kde=False)
            sns.distplot(test_df[label+'_Absolute_Residual'], ax=ax1, color='r', kde=False)
            ax1.axvline(3*(train_ab_stdev + test_ab_stdev)/2, 
                        linestyle='--',
                        color='r')  # Plot +ve 3* sigma 
            plt.xlabel('Residual Value')
            plt.ylabel('Counts')
            plt.legend(['+3*sigma',
                        '<Train Residue> mean:'+str(train_ab_mean)+' std:'+str(train_ab_stdev)+' min:'+str(train_ab_min)+' max:'+str(train_ab_max)+' '+str(score_type)+':'+str(model_train_score),
                        '<Test Residue> mean:'+str(test_ab_mean)+' std:'+str(test_ab_stdev)+' min:'+str(test_ab_min)+' max:'+str(test_ab_max)+' '+str(score_type)+':'+str(model_test_score)
                       ])
            plt.title('Model Residual Error Distribution Plot [Absolute_Residue = abs("Actual" - "Predicted")]')

            
            ## Plot 3. Distplot with Density Plot ( Turn on to draw ) ##
            fig,ax1 = plt.subplots(figsize=(15,15))
            sns.distplot(train_df[label+'_Absolute_Residual'], ax=ax1, color='b', kde=True)
            sns.distplot(test_df[label+'_Absolute_Residual'], ax=ax1, color='r', kde=True)
            ax1.axvline(3*(train_ab_stdev + test_ab_stdev)/2, 
                        linestyle='--',
                        color='r')  # Plot +ve 3* sigma 
            plt.xlabel('Residual Value')
            plt.ylabel('Density')
            plt.legend(['Train Cummulative Density Line',
                        'Test Cummulative Density Line',
                        '+3*sigma',
                        '<Train Residue> mean:'+str(train_ab_mean)+' std:'+str(train_ab_stdev)+' min:'+str(train_ab_min)+' max:'+str(train_ab_max)+' '+str(score_type)+':'+str(model_train_score),
                        '<Test Residue> mean:'+str(test_ab_mean)+' std:'+str(test_ab_stdev)+' min:'+str(test_ab_min)+' max:'+str(test_ab_max)+' '+str(score_type)+':'+str(model_test_score)
                       ])
            plt.title('Model Residual Error Density Plot [Absolute_Residue = abs("Actual" - "Predicted")]')

            ## Histplot  ( Turn on to draw ) ##
    #         fig,ax1 = plt.subplots(figsize=(15,15))
    #         ax1.hist(train_df[label+'_Residual'], color='b')
    #         ax1.hist(test_df[label+'_Residual'], color='r')
    #         plt.legend(['Train','Test'])
    #         plt.title('Model Residual Error Distribution Plot [Residue = abs("Actual" - "Predicted")]')
        
            
            ## Plot 4. Distplot with Distibution Plot [Residual = "Predict" - "Actual"] ( Turn on to draw ) ##
            fig,ax3 = plt.subplots(figsize=(15,15))
            sns.distplot(train_df[label+'_Residual'], ax=ax3, color='b', kde=False)
            sns.distplot(test_df[label+'_Residual'], ax=ax3, color='r', kde=False)
            ax3.axvline(3*(train_stdev + test_stdev)/2, 
                        linestyle='--',
                        color='r')  # Plot +ve 3* sigma 
            ax3.axvline(-3*(train_stdev + test_stdev)/2, 
                        linestyle='--',
                        color='m')  # Plot +ve 3* sigma 
            plt.xlabel('Residual Value')
            plt.ylabel('Counts')
            plt.legend(['+3*sigma',
                        '-3*sigma',
                        '<Train Residue> mean:'+str(train_mean)+' std:'+str(train_stdev)+' min:'+str(train_min)+' max:'+str(train_max)+' '+str(score_type)+':'+str(model_train_score),
                        '<Test Residue> mean:'+str(test_mean)+' std:'+str(test_stdev)+' min:'+str(test_min)+' max:'+str(test_max)+' '+str(score_type)+':'+str(model_test_score)
                       ])
            plt.title('Model Residual Error Distribution Plot [Residue = "Actual" - "Predicted"]')

        
        return train_df,test_df

    
    
    
    
    def _train_test_evals_result_plot(model):
        
        ### Plot Model Train Test Lost Function Trends ### 
        
        
        if len(model.evals_result().keys()) ==1:

            score_type = list(model.evals_result()['validation_0'].keys())[0] # find the "eval_metric" type used in this model
            y_axis_1 = model.evals_result()['validation_0'][score_type] # y-axis evaluation training result scores
            epochs = len(y_axis_1) # x_axis number of epochs
            x_axis= range(0,epochs)

            final_train_score =y_axis_1[-1] # final model training dataset epoch score

            ## plot figure ##

            fig, ax = plt.subplots(figsize=(15,15))
            ax.plot(x_axis, y_axis_1, label='Training_'+score_type , marker='o', color='b' )
            plt.text(x_axis[-1]+1, y_axis_1[-1], '[Last Epochs Train '+score_type+' : '+str(final_train_score)+']') # Add final rmse value test to plot graph
            ax.legend()
            plt.xlabel('Rounds [ Epochs ]')
            plt.ylabel('Lost Function [ '+score_type+ ' ]')
            plt.title('Model Train Test Lost Function Trends ')
            plt.grid()
            
            return

        elif len(model.evals_result().keys()) == 2:

            score_type = list(model.evals_result()['validation_0'].keys())[0] # find the "eval_metric" type used in this model
            y_axis_1 = model.evals_result()['validation_0'][score_type] # y-axis evaluation Training result scores
            y_axis_2 = model.evals_result()['validation_1'][score_type] # y-axis evaluation Testing result scores
            epochs = len(y_axis_1) # x_axis number of epochs
            x_axis= range(0,epochs)
            final_train_score =y_axis_1[-1] # final model training dataset epoch score
            final_test_score =y_axis_2[-1] # final model testing dataset epoch score

            ## plot figure ## 
            fig,ax = plt.subplots(figsize=(15,15))
            ax.plot(x_axis, y_axis_1, label='Training_dataset_'+score_type,  marker = 'o', color='b')
            plt.text(x_axis[-1]+1, y_axis_1[-1], '[Last Epochs Train '+score_type+' : '+str(final_train_score)+']')

            ax.plot(x_axis, y_axis_2, label='Testing_dataset_'+score_type,  marker='o', color='r')
            plt.text(x_axis[-1]+1, y_axis_2[-1], '[Last Epochs Test '+score_type+' : '+str(final_test_score)+']')

            ax.legend()
            plt.xlabel('Rounds [ Epochs ]')
            plt.ylabel('Lost Function [ '+score_type+ ' ]')
            plt.title('Model Train Test Lost Function Trends ')
            plt.grid()
            
            return
            
        else:
            print("Model Evals_result more 2 types [ Training and Testing ] ... edit code on  [ XGB_model_evaluation_function ] ")
        
            return
        
    
    
    
    def _analyze_over_under_reject_function(train_df,test_df,label,output_spec_limits, over_under_reject_analyze_feature):
        ### 1. Plot Over Reject Graph ###
        ### 2. Plot Over Reject Graph ###
            # Plot 1 -  [TRAINING Dataset] UNDER REJECT Scatter Plot
            # Plot 2 -  [TESTING Dataset] UNDER REJECT Scatter Plot
            # Plot 3 -  [TRAINING Dataset] OVER REJECT Scatter Plot
            # Plot 4 -  [TESTING Dataset] OVER REJECT Scatter Plot
        
        ### Conditonal ###
        
        ## under reject = [ predicted output < output_spec_limits ] & [ actual output > output_spec_limits ]
        ## Over reject = [ predicted output > output_spec_limits ] & [ actual output < output_spec_limits ] 
        ## rejection threshold = Ground Truth value 
        ## distinguish_feature = distinguish over reject by features class
        
        ### Hyper Parameter ###
        
        ## 1. train_df == train dataframe
        ## 2. test_df == test dataframe
        ## 3. label == output label features name
        ## 4. output_spec_limits = output spec limit
                # is a list = [lower spec limit , upper spec limit ]
                # if no spec = 'Null' , Ex : [ 35, 'Null']
        ## 5. over_under_reject_analyze_feature == analyze feature name, it is a single index list ['label_name'] 
            # if 'Null' = does not plot according to features class
            #'feature name' = plot according to feature name class
        
        

        train_under_df = train_df.copy()
        train_over_df = train_df.copy()
        
        test_under_df = test_df.copy()
        test_over_df = test_df.copy()
        
        
        ## Train Dataset Under/Over Reject Analysis ##
        if over_under_reject_analyze_feature[0] =='Null': # When over_under_reject_analyze_feature is NOT GIVEN == " NULL"
            
            if output_spec_limits[0] != 'Null' and output_spec_limits[1] != 'Null': # output_spec_limits have " upper spec" + " Lower Spec"

                ## Under Reject ##
                
                # Train dataset [ actual output > output_spec_limits ]
                train_under_df = train_under_df.loc[ (train_df[label+'_Actual'] < output_spec_limits[0] ) | (train_df[label+'_Actual'] > output_spec_limits[1]),
                                              [label+'_Actual', label+'_Predict'] ] 
                
                # Train dataset [ predicted output < output_spec_limits ]
                train_under_df = train_under_df.loc[(train_under_df[label+'_Predict'] >= output_spec_limits[0]) & (train_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                                    [label+'_Actual',label+'_Predict'] ]
                
                # Test Dataset [ actual output > output_spec_limits ]
                test_under_df = test_under_df.loc[ (test_df[label+'_Actual'] < output_spec_limits[0] ) | (test_df[label+'_Actual'] > output_spec_limits[1]),
                                            [label+'_Actual', label+'_Predict'] ] 
                
                # Test Dataset [ predicted output < output_spec_limits ]
                test_under_df = test_under_df.loc[(test_under_df[label+'_Predict'] >= output_spec_limits[0]) & (test_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                                  [label+'_Actual',label+'_Predict'] ]
                
                
                
                ## Over Reject ##
                
                # Train dataset [ actual output < output_spec_limits ] 
                train_over_df = train_over_df.loc[ (train_df[label+'_Actual'] >= output_spec_limits[0] ) & (train_df[label+'_Actual'] <= output_spec_limits[1]),
                                             [label+'_Actual', label+'_Predict'] ] 
                
                # Train dataset [ predicted output > output_spec_limits ]
                train_over_df = train_over_df.loc[(train_over_df[label+'_Predict'] < output_spec_limits[0]) | (train_over_df[label+'_Predict'] > output_spec_limits[1]),
                                                  [label+'_Actual',label+'_Predict'] ]
                
                # Test Dataset [ actual output < output_spec_limits ] 
                test_over_df = test_over_df.loc[ (test_df[label+'_Actual'] >= output_spec_limits[0] ) & (test_df[label+'_Actual'] <= output_spec_limits[1]),
                                           [label+'_Actual', label+'_Predict'] ] 
                
                # Test Dataset [ predicted output > output_spec_limits ]
                test_over_df = test_over_df.loc[(test_over_df[label+'_Predict'] < output_spec_limits[0]) | (test_over_df[label+'_Predict'] > output_spec_limits[1]),
                                                [label+'_Actual',label+'_Predict'] ]
                
                
            elif output_spec_limits[0] != 'Null' and output_spec_limits[1] == 'Null': # output_spec_limits ONLY HAVE " Lower Spec" !!!!
                
                ## Under Reject ##
                
                # Train dataset [ actual output > output_spec_limits ]
                train_under_df = train_under_df.loc[ (train_df[label+'_Actual'] < output_spec_limits[0] ) & (train_under_df[label+'_Predict'] >= output_spec_limits[0]),
                                              [label+'_Actual', label+'_Predict'] ]
                
                # Test Dataset [ actual output > output_spec_limits ]
                test_under_df = test_under_df.loc[ (test_df[label+'_Actual'] < output_spec_limits[0] ) & (test_under_df[label+'_Predict'] >= output_spec_limits[0]),
                                            [ label+'_Actual', label+'_Predict'] ] 
            
                
                ## Over Reject ##
                
                # Train dataset [ actual output < output_spec_limits ] 
                train_over_df = train_over_df.loc[ (train_df[label+'_Actual'] >= output_spec_limits[0] ) & (train_over_df[label+'_Predict'] < output_spec_limits[0]),
                                             [label+'_Actual', label+'_Predict'] ] 
                
                # Test Dataset [ actual output < output_spec_limits ] 
                test_over_df = test_over_df.loc[ (test_df[label+'_Actual'] >= output_spec_limits[0] ) & (test_over_df[label+'_Predict'] < output_spec_limits[0]),
                                           [label+'_Actual', label+'_Predict'] ] 
                
                
                
            elif output_spec_limits[0] == 'Null' and output_spec_limits[1] != 'Null': # output_spec_limits ONLY HAVE " Upper Spec" !!!!
                
                ## Under Reject ##
                
                # Train dataset [ actual output > output_spec_limits ]
                train_under_df = train_under_df.loc[ (train_df[label+'_Actual'] > output_spec_limits[1] ) & (train_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                              [label+'_Actual', label+'_Predict'] ]
                
                # Test Dataset [ actual output > output_spec_limits ]
                test_under_df = test_under_df.loc[ (test_df[label+'_Actual'] > output_spec_limits[1] ) & (test_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                            [label+'_Actual', label+'_Predict'] ] 
            
                
                ## Over Reject ##
                
                # Train dataset [ actual output < output_spec_limits ] 
                train_over_df = train_over_df.loc[ (train_df[label+'_Actual'] <= output_spec_limits[1] ) & (train_over_df[label+'_Predict'] > output_spec_limits[1]),
                                             [label+'_Actual', label+'_Predict'] ] 
             
                # Test Dataset [ actual output < output_spec_limits ] 
                test_over_df = test_over_df.loc[ (test_df[label+'_Actual'] <= output_spec_limits[1] ) & (test_over_df[label+'_Predict'] > output_spec_limits[1]),
                                           [label+'_Actual', label+'_Predict'] ]
                
            else: # Empty output_spec_limits !!!!
                Print("WARNING : output_spec_limits is Empty !!! , Please input Spec limit Threshold before calling the function")
            
            # add plot index column to all dataframe
            train_under_df['plot_index'] = range(1,len(train_under_df)+1)
            test_under_df['plot_index'] = range(1,len(test_under_df)+1)
            train_over_df['plot_index'] = range(1,len(train_over_df)+1)
            test_over_df['plot_index'] = range(1,len(test_over_df)+1)
            
            
            legend_name = ["Predicted", "Actual"] # legend naming list 
            
            ## Plot 1 -  [TRAINING Dataset] UNDER REJECT Scatter Plot ##
            fig, ax1 = plt.subplots(figsize =(25,18))
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax1.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                legend_name.insert(0,'Lower Specs Limit') # if there is lower spec limit add lower spec limit legend name
                
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax1.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                legend_name.insert(1,'Upper Specs Limit') # if there is upper spec limit add lower spec limit legend name
                
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=train_under_df , ax = ax1, s=60, marker='o') 
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=train_under_df , ax = ax1 , s=60, marker='^' ) 
            plt.legend(legend_name, loc='upper right')
            plt.title('Model Prediction UNDER REJECT (TRAINING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            
            
            ## Plot 2 -  [TESTING Dataset] UNDER REJECT Scatter Plot ##
            fig, ax2 = plt.subplots(figsize =(25,18))
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax2.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax2.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
            
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=test_under_df , ax = ax2, s=60, marker='o')# scatter plot - predicted value
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=test_under_df , ax = ax2 , s=60, marker='^') # scatter plot - actual value
            plt.legend(legend_name,loc='upper right')
            plt.title('Model Prediction UNDER REJECT (TESTING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            
            
            
            
            ## Plot 3 -  [TRAINING Dataset] OVER REJECT Scatter Plot ##
            fig, ax3 = plt.subplots(figsize =(25,18))
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax3.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax3.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=train_over_df , ax = ax3, s=60, marker='o') # scatter plot - predicted value
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=train_over_df , ax = ax3 , s=60, marker='^') # scatter plot - actual value
            plt.legend(legend_name,loc='upper right')
            plt.title('Model Prediction OVER REJECT (TRAINING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            
            
            
            ## Plot 4 -  [TESTING Dataset] OVER REJECT Scatter Plot ##
            fig, ax4 = plt.subplots(figsize =(25,18))
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax4.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax4.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=test_over_df , ax = ax4, s=60, marker='o') # scatter plot - predicted value        
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=test_over_df , ax = ax4 , s=60, marker='^') # scatter plot - actual value
            plt.legend(legend_name,loc='upper right')
            plt.title('Model Prediction OVER REJECT (TRAINING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            

            
        else:  # When over_under_reject_analyze_feature is GIVEN != " NULL"
            
            if output_spec_limits[0] != 'Null' and output_spec_limits[1] != 'Null': # output_spec_limits have " upper spec" + " Lower Spec"

                ## Under Reject ##
                
                # Train dataset [ actual output > output_spec_limits ]
                train_under_df = train_under_df.loc[ (train_df[label+'_Actual'] < output_spec_limits[0] ) | (train_df[label+'_Actual'] > output_spec_limits[1]),
                                              [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
                
                # Train dataset [ predicted output < output_spec_limits ]
                train_under_df = train_under_df.loc[(train_under_df[label+'_Predict'] >= output_spec_limits[0]) & (train_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                                    [over_under_reject_analyze_feature[0],label+'_Actual',label+'_Predict'] ]
                
                # Test Dataset [ actual output > output_spec_limits ]
                test_under_df = test_under_df.loc[ (test_df[label+'_Actual'] < output_spec_limits[0] ) | (test_df[label+'_Actual'] > output_spec_limits[1]),
                                            [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
                
                # Test Dataset [ predicted output < output_spec_limits ]
                test_under_df = test_under_df.loc[(test_under_df[label+'_Predict'] >= output_spec_limits[0]) & (test_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                                  [over_under_reject_analyze_feature[0],label+'_Actual',label+'_Predict'] ]
                
                
                
                ## Over Reject ##
                
                # Train dataset [ actual output < output_spec_limits ] 
                train_over_df = train_over_df.loc[ (train_df[label+'_Actual'] >= output_spec_limits[0] ) & (train_df[label+'_Actual'] <= output_spec_limits[1]),
                                             [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
                
                # Train dataset [ predicted output > output_spec_limits ]
                train_over_df = train_over_df.loc[(train_over_df[label+'_Predict'] < output_spec_limits[0]) | (train_over_df[label+'_Predict'] > output_spec_limits[1]),
                                                  [over_under_reject_analyze_feature[0],label+'_Actual',label+'_Predict'] ]
                
                # Test Dataset [ actual output < output_spec_limits ] 
                test_over_df = test_over_df.loc[ (test_df[label+'_Actual'] >= output_spec_limits[0] ) & (test_df[label+'_Actual'] <= output_spec_limits[1]),
                                           [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
                
                # Test Dataset [ predicted output > output_spec_limits ]
                test_over_df = test_over_df.loc[(test_over_df[label+'_Predict'] < output_spec_limits[0]) | (test_over_df[label+'_Predict'] > output_spec_limits[1]),
                                                [over_under_reject_analyze_feature[0],label+'_Actual',label+'_Predict'] ]
                
                
            elif output_spec_limits[0] != 'Null' and output_spec_limits[1] == 'Null': # output_spec_limits ONLY HAVE " Lower Spec" !!!!
                
                ## Under Reject ##
                
                # Train dataset [ actual output > output_spec_limits ]
                train_under_df = train_under_df.loc[ (train_df[label+'_Actual'] < output_spec_limits[0] ) & (train_under_df[label+'_Predict'] >= output_spec_limits[0]),
                                              [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ]
                
                # Test Dataset [ actual output > output_spec_limits ]
                test_under_df = test_under_df.loc[ (test_df[label+'_Actual'] < output_spec_limits[0] ) & (test_under_df[label+'_Predict'] >= output_spec_limits[0]),
                                            [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
            
                
                ## Over Reject ##
                
                # Train dataset [ actual output < output_spec_limits ] 
                train_over_df = train_over_df.loc[ (train_df[label+'_Actual'] >= output_spec_limits[0] ) & (train_over_df[label+'_Predict'] < output_spec_limits[0]),
                                             [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
                
                # Test Dataset [ actual output < output_spec_limits ] 
                test_over_df = test_over_df.loc[ (test_df[label+'_Actual'] >= output_spec_limits[0] ) & (test_over_df[label+'_Predict'] < output_spec_limits[0]),
                                           [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
                
                
                
            elif output_spec_limits[0] == 'Null' and output_spec_limits[1] != 'Null': # output_spec_limits ONLY HAVE " Upper Spec" !!!!
                
                ## Under Reject ##
                
                # Train dataset [ actual output > output_spec_limits ]
                train_under_df = train_under_df.loc[ (train_df[label+'_Actual'] > output_spec_limits[1] ) & (train_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                              [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ]
                
                # Test Dataset [ actual output > output_spec_limits ]
                test_under_df = test_under_df.loc[ (test_df[label+'_Actual'] > output_spec_limits[1] ) & (test_under_df[label+'_Predict'] <= output_spec_limits[1]),
                                            [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
            
                
                ## Over Reject ##
                
                # Train dataset [ actual output < output_spec_limits ] 
                train_over_df = train_over_df.loc[ (train_df[label+'_Actual'] <= output_spec_limits[1] ) & (train_over_df[label+'_Predict'] > output_spec_limits[1]),
                                             [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ] 
             
                # Test Dataset [ actual output < output_spec_limits ] 
                test_over_df = test_over_df.loc[ (test_df[label+'_Actual'] <= output_spec_limits[1] ) & (test_over_df[label+'_Predict'] > output_spec_limits[1]),
                                           [over_under_reject_analyze_feature[0], label+'_Actual', label+'_Predict'] ]
                
            else: # Empty output_spec_limits !!!!
                Print("WARNING : output_spec_limits is Empty !!! , Please input Spec limit Threshold before calling the function")
               
            
            # add plot index column to all dataframe
            train_under_df['plot_index'] = range(1,len(train_under_df)+1)
            test_under_df['plot_index'] = range(1,len(test_under_df)+1)
            train_over_df['plot_index'] = range(1,len(train_over_df)+1)
            test_over_df['plot_index'] = range(1,len(test_over_df)+1)
            
            
            ## Plot 1 -  [TRAINING Dataset] UNDER REJECT Scatter Plot ##
            fig, ax1 = plt.subplots(figsize =(25,18))
            
            spec_legend_name=[]
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax1.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                spec_legend_name.insert(0,'Lower Specs Limit') # if there is lower spec limit add lower spec limit legend name
                
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax1.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                spec_legend_name.insert(1,'Upper Specs Limit') # if there is upper spec limit add lower spec limit legend name
                

            sns.scatterplot(x='plot_index', y=label+'_Predict', data=train_under_df , ax = ax1, s=60, marker='o', hue=over_under_reject_analyze_feature[0])
            leg=plt.legend(loc='upper right', title=over_under_reject_analyze_feature[0]+' - Model Predicted')
            ax1.add_artist(leg) # every increase in legend plot need to add an artist
            
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=train_under_df , ax = ax1 , s=60, marker='^', hue=over_under_reject_analyze_feature[0] ) #,  color='k')
            
            # actual scatter plot legend  
            plt_2_legend_name = list(train_under_df[over_under_reject_analyze_feature[0]].unique())
            plt_2 = [ plt.plot([], [], marker='^')[0] for i in range(len(plt_2_legend_name))]
            leg_1=plt.legend(handles=plt_2, labels=plt_2_legend_name ,loc='upper left', title=over_under_reject_analyze_feature[0]+' - Model Actual')
            ax1.add_artist(leg_1) # every increase in legend plot need to add an artist
            
            # Spec limit scatter plot legend  
            upper_spec_legend_style = mlines.Line2D([],[], color='m', linestyle='--', label='Upper Specs Limit')
            lower_spec_legend_style = mlines.Line2D([],[], color='r', linestyle='--',label='Lower Specs Limit')
            plt.legend(handles=[lower_spec_legend_style, upper_spec_legend_style] ,loc='upper center', title=over_under_reject_analyze_feature[0]+' - Spec Limit')
            
            plt.title('Model Prediction UNDER REJECT (TRAINING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            
            
            ## Plot 2 -  [TESTING Dataset] UNDER REJECT Scatter Plot ##
            fig, ax2 = plt.subplots(figsize =(25,18))
            spec_legend_name=[]
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax2.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                spec_legend_name.insert(0,'Lower Specs Limit') # if there is lower spec limit add lower spec limit legend name
                
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax2.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                spec_legend_name.insert(1,'Upper Specs Limit') # if there is upper spec limit add lower spec limit legend name
                
        
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=test_under_df , ax = ax2, s=60, marker='o', hue=over_under_reject_analyze_feature[0]) # scatter plot - predicted value
            leg=plt.legend(loc='upper right', title=over_under_reject_analyze_feature[0]+' - Model Predicted')
            ax2.add_artist(leg)
            
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=test_under_df , ax = ax2 , s=60, marker='^', hue=over_under_reject_analyze_feature[0] ) # scatter plot - actual value
            
            # how to plot multiple scatter plot and multiple legend in one plot link : 
            
            # actual scatter plot legend  
            plt_2_legend_name = list(test_under_df[over_under_reject_analyze_feature[0]].unique())
            plt_2 = [ plt.plot([], [], marker='^')[0] for i in range(len(plt_2_legend_name))]
            leg_1=plt.legend(handles=plt_2, labels=plt_2_legend_name ,loc='upper left', title=over_under_reject_analyze_feature[0]+' - Model Actual')
            ax2.add_artist(leg_1) 
            
            # Spec limit scatter plot legend  
            upper_spec_legend_style = mlines.Line2D([],[], color='m', linestyle='--', label='Upper Specs Limit')
            lower_spec_legend_style = mlines.Line2D([],[], color='r', linestyle='--',label='Lower Specs Limit')
            plt.legend(handles=[lower_spec_legend_style, upper_spec_legend_style] ,loc='upper center', title=over_under_reject_analyze_feature[0]+' - Spec Limit')
            
            plt.title('Model Prediction UNDER REJECT (TESTING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            
            
            ## Plot 3 -  [TRAINING Dataset] OVER REJECT Scatter Plot ##
            fig, ax3 = plt.subplots(figsize =(25,18))
            spec_legend_name=[]
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax3.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                spec_legend_name.insert(0,'Lower Specs Limit') # if there is lower spec limit add lower spec limit legend name
                
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax3.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                spec_legend_name.insert(1,'Upper Specs Limit') # if there is upper spec limit add lower spec limit legend name
                
                
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=train_over_df , ax = ax3, s=60, marker='o', hue=over_under_reject_analyze_feature[0]) # scatter plot - predicted value
            leg=plt.legend(loc='upper right', title=over_under_reject_analyze_feature[0]+' - Model Predicted')
            ax3.add_artist(leg)
            
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=train_over_df , ax = ax3 , s=60, marker='^', hue=over_under_reject_analyze_feature[0] ) # scatter plot - actual value
            
            # actual scatter plot legend  
            plt_2_legend_name = list(train_over_df[over_under_reject_analyze_feature[0]].unique())
            plt_2 = [ plt.plot([], [], marker='^')[0] for i in range(len(plt_2_legend_name))]
            leg_1=plt.legend(handles=plt_2, labels=plt_2_legend_name ,loc='upper left', title=over_under_reject_analyze_feature[0]+' - Model Actual')
            ax3.add_artist(leg_1)
            
            # Spec limit scatter plot legend  
            upper_spec_legend_style = mlines.Line2D([],[], color='m', linestyle='--', label='Upper Specs Limit')
            lower_spec_legend_style = mlines.Line2D([],[], color='r', linestyle='--',label='Lower Specs Limit')
            plt.legend(handles=[lower_spec_legend_style, upper_spec_legend_style] ,loc='upper center', title=over_under_reject_analyze_feature[0]+' - Spec Limit')
            
            plt.title('Model Prediction OVER REJECT (TRAINING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)
            
            
            ## Plot 4 -  [TESTING Dataset] OVER REJECT Scatter Plot ##
            fig, ax4 = plt.subplots(figsize =(25,18))
            spec_legend_name=[]
            
            if output_spec_limits[0] != 'Null' : # plot lower spec limit line
                ax4.axhline(output_spec_limits[0], 
                           linestyle='--',
                           color='r') # Predicted Ground Truth Value
                spec_legend_name.insert(0,'Lower Specs Limit') # if there is lower spec limit add lower spec limit legend name
                
                
            if output_spec_limits[1] != 'Null' : # plot upper spec limit line 

                ax4.axhline(output_spec_limits[1], 
                           linestyle='--',
                           color='m') # Predicted Ground Truth Value
                spec_legend_name.insert(1,'Upper Specs Limit') # if there is upper spec limit add lower spec limit legend name
            
            
            sns.scatterplot(x='plot_index', y=label+'_Predict', data=test_over_df , ax = ax4, s=60, marker='o', hue=over_under_reject_analyze_feature[0]) # scatter plot - predicted value
            leg=plt.legend(loc='upper right', title=over_under_reject_analyze_feature[0]+' - Model Predicted')
            ax4.add_artist(leg)
            
            sns.scatterplot(x='plot_index', y=label+'_Actual', data=test_over_df , ax = ax4 , s=60, marker='^', hue=over_under_reject_analyze_feature[0] ) # scatter plot - actual value
            
            # actual scatter plot legend  
            plt_2_legend_name = list(test_over_df[over_under_reject_analyze_feature[0]].unique())
            plt_2 = [ plt.plot([], [], marker='^')[0] for i in range(len(plt_2_legend_name))]
            leg_1=plt.legend(handles=plt_2, labels=plt_2_legend_name ,loc='upper left', title=over_under_reject_analyze_feature[0]+' - Model Actual')
            ax4.add_artist(leg_1)
            
             # Spec limit scatter plot legend  
            upper_spec_legend_style = mlines.Line2D([],[], color='m', linestyle='--', label='Upper Specs Limit')
            lower_spec_legend_style = mlines.Line2D([],[], color='r', linestyle='--',label='Lower Specs Limit')
            plt.legend(handles=[lower_spec_legend_style, upper_spec_legend_style] ,loc='upper center', title=over_under_reject_analyze_feature[0]+' - Spec Limit')
            
            plt.title('Model Prediction OVER REJECT (TRAINING Dataset)')
            plt.xlabel('No')
            plt.ylabel(label)

        return
    
    

    
    
    
    
    
    train_df, test_df = _evaluate_train_test_model_function(model,label,output_spec_limits,train_df,test_df,X_train,x_test,Y_train,y_test)
    
    _train_test_evals_result_plot( model )  
    
    _analyze_over_under_reject_function (train_df, test_df,label, output_spec_limits, over_under_reject_analyze_feature)
    
            
    plt.show()
    return
        