In [46]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, root_mean_squared_error

In [51]:
def evaluate_results(path, func, threshold):
    """Function to evaluate the classification accuracy using the generated expression."""
    # Import test data
    test_data = pd.read_csv(path, header=None, index_col=None)
    y_test = test_data.iloc[:, -1].values
    
    y_pred = test_data.apply(func, axis=1)
    y_pred_binarized = (y_pred > threshold).astype(int)
    
    # Evaluate the model
    conf_mat = confusion_matrix(y_test, y_pred_binarized)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred_binarized).ravel()
    print(classification_report(y_test, y_pred_binarized))
    print(f"Confusion matrix: TN: {tn}, FP: {fp}, FN: {fn}, TP: {tp} \n", conf_mat)
    print(f'AUC score: {round(roc_auc_score(y_test, y_pred_binarized),2)}')
    print(f'RMSE: {round(root_mean_squared_error(y_test, y_pred_binarized),2)}')
    
def symbolic_regression_function1(df):
    """Definition of the symbolic regression function"""
    # (((C*C+C*D)-B)-A*C)-C*B
    A = 0
    B = 1
    D = 2
    C1 = -0.6965971995944867
    C2 = 1.2781484814751356
    C3 = 1.2430512333536776
    C4 = 0.027783268153546238
    C5 = -2.469009073022163

    return (((C1*C2+C3*df[D])-df[B])-df[A]*C4)-C5*df[B]

def symbolic_regression_function2(df):
    # ((C+D)+B)-log(A)*C
    A = 0
    B = 1
    D = 2
    C1 = 0.01773634464316796
    C2 = 0.4539789850289721
    
    return ((C1+df[D])+df[B])-np.log(df[A])*C2

def symbolic_regression_function3(df):
    # (B/C+sqrt(A))-((C+D)-sqrt(A+C))*C
    A = 0
    B = 1
    D = 2
    C = [0.644624167915657,-0.8059279527643397,-0.9518927236245961,-1.200766150699288]
    
    return (df[B] / C[0] + np.sqrt(df[A])) - ((C[1] + df[D]) - np.sqrt(df[A] + C[2])) * C[3]

In [57]:
# Main
path = '../HVAE-master/data/telco/telco3var_test.csv'
evaluate_results(path, symbolic_regression_function3, threshold=0.1)

              precision    recall  f1-score   support

           0       0.77      0.73      0.75       200
           1       0.71      0.75      0.73       174

    accuracy                           0.74       374
   macro avg       0.74      0.74      0.74       374
weighted avg       0.74      0.74      0.74       374

Confusion matrix: TN: 147, FP: 53, FN: 44, TP: 130 
 [[147  53]
 [ 44 130]]
AUC score: 0.74
RMSE: 0.51
