# Baseline Results

In [21]:
import os
import time
import pickle 
import logging
from sklearn.preprocessing import StandardScaler
import numpy as np # for vector / matrix operations
import pandas as pd # for data manipulation
import polars as pl
from sklearn.metrics import recall_score, roc_auc_score, accuracy_score
from assignment_3_tools import parquet_to_dict
from great_tables import GT, md, html, from_column, style, loc, vals

In [6]:
# Unique Datasets.
def unq_df_names(lazy_dict):
    """
    Creates a set of unique datasets from a LazyFrame dictionary.
    ---
    Args: 
        lazy_dict (dict): Contains LazyFrame names and corresponding LazyFrames.
    Returns:
        unq_names (set): Contains unique dataset names.
    """
    all_names = list()
    for key in lazy_dict:
        if key[-6:] == "_train":
            all_names.append(key[:-8]) # Remove _X_train and _y_train
        elif key[-5:] == "_test":
            all_names.append(key[:-7]) # Remove _X_test and _y_test
        else:
            pass
    unq_names = set(all_names)
    return unq_names

# Return Corresponding Test Set.
def corr_testset(unq_name):
    """
    Return the names of testsets corresponding to a preprocessed trainset
    ---
    Args:
        unq_name(set): Contains unique dataset names.
    Returns:
        X_test_name(str): Name of corresponding predictor testset.
        y_test_name(str): Name of corresponding response testset.
    """
    threshold = unq_name[-2:] # 2 possibilities: "##" or "mp"
    if threshold.isnumeric():
        # Use null-threshold datasets with no balancing operations.
        X_test_name = f"df_heart_drop_{threshold}_imp_X_test"
        y_test_name = f"df_heart_drop_{threshold}_imp_y_test"
    else:
        # Use null-threshold datasets with no balancing operations. 
        X_test_name = f"{unq_name}_X_test"
        y_test_name = f"{unq_name}_y_test"
    return X_test_name, y_test_name

In [11]:
def evaluate_models(lazy_dict, model_path):
    """
    Loads all models saved as pickle files in the specified directory and evaluates them on the given test data.
    """
    results = {
        'Dataset_name': [],
        'Recall': [],
        'ROC_AUC': [],
        'Accuracy': []}
    
    for filename in os.listdir(model_path):
        if filename.endswith('MLPbaseline'):
            # Save the model filename to a variable
            model_name = filename
            logging.info(f"Evaluating model: {model_name}")
            
            # Load the model from disk
            file_path = os.path.join(model_path, model_name)
            with open(file_path, 'rb') as file:
                model = pickle.load(file)
            
            model_name = model_name[:-12]
            
            X_train_name = f"{model_name}_X_train"
            y_train_name = f"{model_name}_y_train"
            (X_test_name, y_test_name) = corr_testset(model_name)

            X_train = lazy_dict[X_train_name].collect().to_pandas()
            y_train = lazy_dict[y_train_name].collect().to_pandas()
            X_test = lazy_dict[X_test_name].collect().to_pandas()
            y_test = lazy_dict[y_test_name].collect().to_pandas()
            
            X_train.drop(columns=['__index_level_0__'], errors='ignore', inplace=True)
            y_train.drop(columns=['__index_level_0__'], errors='ignore', inplace=True)
            X_test.drop(columns=['__index_level_0__'], errors='ignore', inplace=True)
            y_test.drop(columns=['__index_level_0__'], errors='ignore', inplace=True)

            y_train = y_train.to_numpy().ravel()
            y_test = y_test.to_numpy().ravel()


            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            y_pred = model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
            

            # Store the results
            results['Dataset_name'].append(model_name)
            results['Recall'].append(recall)
            results['ROC_AUC'].append(roc_auc)
            results['Accuracy'].append(accuracy)

    # Convert results to DataFrame for easier viewing and analysis
    results_df = pd.DataFrame(results)
    return results_df

In [14]:
%%time

# Log Initialization
logging.basicConfig(
    filename='./log/MLP_baseline.log', 
    filemode='w',
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s')

## Paths
data_path = "../../Data/GoogleDrive/Encoded_Data/"
model_path = "../../Data/GoogleDrive/Baseline/"

## Read in Parquet to LazyFrame Dictionary.
pq_jar = parquet_to_dict(data_path)

## Record the unique dataset names.
unq_names = unq_df_names(pq_jar)


results = evaluate_models(pq_jar, model_path)

print(results)



                     Dataset_name    Recall   ROC_AUC  Accuracy
0            df_heart_drop_03_imp  0.124849  0.656228  0.884549
1    Over_Sample_1:1_threshold_03  0.726785  0.637981  0.739440
2   Under_Sample_7:1_threshold_00  0.254700  0.585215  0.864388
3   Under_Sample_2:1_threshold_40  0.549143  0.652675  0.819389
4    Over_Sample_1:2_threshold_20  0.567522  0.683101  0.812198
..                            ...       ...       ...       ...
72  Under_Sample_5:1_threshold_20  0.289927  0.583354  0.868521
73   Over_Sample_1:1_threshold_10  0.732660  0.649277  0.742703
74   Over_Sample_1:5_threshold_05  0.303207  0.597743  0.866846
75  Under_Sample_1:1_threshold_20  0.752738  0.705428  0.732085
76  Under_Sample_7:1_threshold_40  0.199862  0.566098  0.876532

[77 rows x 4 columns]
CPU times: user 5min 26s, sys: 2min 52s, total: 8min 19s
Wall time: 1min 39s




In [None]:
results.sort_values(by='Recall', ascending=False).to_parquet("../../Data/GoogleDrive/te")

In [24]:
GT(results.sort_values(by='Recall', ascending=False)).fmt_number(columns=["Recall","ROC_AUC","Accuracy"], decimals=2)

0,1,2,3
Under_Sample_1:1_threshold_20,0.75,0.71,0.73
Under_Sample_1:1_threshold_03,0.74,0.69,0.73
Over_Sample_1:1_threshold_05,0.74,0.67,0.74
Over_Sample_1:1_threshold_10,0.73,0.65,0.74
Over_Sample_1:1_threshold_40,0.73,0.63,0.74
Over_Sample_1:1_threshold_03,0.73,0.64,0.74
Under_Sample_1:1_threshold_05,0.73,0.64,0.75
Under_Sample_1:1_threshold_10,0.72,0.60,0.75
Over_Sample_1:1_threshold_20,0.72,0.69,0.75
Under_Sample_1:1_threshold_40,0.72,0.66,0.74
