In [29]:
import pandas as pd

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
)

In [30]:
# Create a copy of the dirty dataset to annotate
def get_annotated_output(fahes_output: pd.DataFrame, dirty_dataset: pd.DataFrame):
    annotated_output = dirty_dataset.copy()

    # Iterate over the rows of `dmv_file`
    for _, row in fahes_output.iterrows():
        attribute_name = row["Attribute Name"]
        value_to_replace = row["DMV"]

        # Perform the replacement in the specific column
        annotated_output[attribute_name] = annotated_output[attribute_name].replace(
            value_to_replace, "FAHES_ERROR"
        )

    # Encode "FAHES_ERROR" with 1, the other values with 0
    annotated_output = annotated_output.applymap(
        lambda x: 1 if x == "FAHES_ERROR" else 0
    )

    return annotated_output

In [31]:
def annotate_errors(
    df_fixed: pd.DataFrame, df_with_errors: pd.DataFrame
) -> pd.DataFrame:
    # Check if the dataframes have the same shape and columns after sorting
    if df_fixed.shape != df_with_errors.shape or not all(
        df_fixed.columns == df_with_errors.columns
    ):

        raise ValueError("Both dataframes must have the same structure.")

    # Convert both dataframes to strings for datatype-agnostic comparison
    df_fixed_str = df_fixed.astype(str)
    df_with_errors_str = df_with_errors.astype(str)

    # Create the annotation dataframe by comparing the two dataframes
    error_annotation = (df_fixed_str != df_with_errors_str).astype(int)

    return error_annotation

In [32]:
def inspect_classification(
    true_dataset: pd.DataFrame, pred_dataset: pd.DataFrame, input_dataset: pd.DataFrame
):
    true_dataset.reset_index(drop=True)
    pred_dataset.reset_index(drop=True)
    input_dataset.reset_index(drop=True)

    true_dataset.columns = input_dataset.columns
    pred_dataset.columns = input_dataset.columns

    calc = true_dataset.add(2)
    calc_out = pred_dataset.copy()
    calc_out[calc_out == 0] = -1

    calc = calc.add(calc_out)

    # True positive calculation
    tp = calc == 4
    true_positive_df = input_dataset[tp].astype(str)
    true_positive_df = true_positive_df.replace(to_replace="nan", value=0)
    true_positive_df = true_positive_df.reset_index(drop=True)  # Remove index

    # False positive calculation
    fp = calc == 3
    false_positive_df = input_dataset[fp].astype(str)
    false_positive_df = false_positive_df.replace(to_replace="nan", value=0)
    false_positive_df = false_positive_df.reset_index(drop=True)  # Remove index

    # False negative calculation
    fn = calc == 2
    false_negative_df = input_dataset[fn].astype(str)
    false_negative_df = false_negative_df.replace(to_replace="nan", value=0)
    false_negative_df = false_negative_df.reset_index(drop=True)  # Remove index

    all_errors_df = input_dataset[fp | fn].astype(str)
    all_errors_df = all_errors_df.replace(to_replace="nan", value=0)
    all_errors_df = all_errors_df.reset_index(drop=True)  # Remove index

    return true_positive_df, false_positive_df, false_negative_df, all_errors_df

In [33]:
def calculate_metrics(true_dataset: pd.DataFrame, pred_dataset: pd.DataFrame):
    # Flatten the dataframes to 1D arrays
    y_true = true_dataset.values.flatten()
    y_pred = pred_dataset.values.flatten()

    # Basic metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    # Class-specific accuracy
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    # AUC scores
    roc_auc = roc_auc_score(y_true, y_pred)
    pr_auc = average_precision_score(y_true, y_pred)

    # Count of 1s in true and predicted labels
    true_positives_count = sum(y_true == 1)  # Total 1s in true labels
    predicted_positives_count = tp  # Total 1s in predictions

    return {
        "accuracy": float(accuracy),
        "precision": float(precision),
        "recall": float(recall),
        "f1_score": float(f1),
        "roc_auc": float(roc_auc),
        "pr_auc": float(pr_auc),
        "true_positives_count": int(true_positives_count),
        "predicted_positives_count": int(predicted_positives_count),
    }

In [None]:
folder = "rayyan"

dirty_dataset = pd.read_csv(f"../../datasets/{folder}/{folder}.csv")
clean_dataset = pd.read_csv(f"../../datasets/{folder}/clean.csv")
dmv_file = pd.read_csv(f"./output/{folder}/DMV_{folder}.csv")

output = get_annotated_output(dmv_file, dirty_dataset)
output.to_csv(f"./output/{folder}/annotated_output.csv")

error_annotation = annotate_errors(clean_dataset, dirty_dataset)

true_positive_df, false_positive_df, false_negative_df, all_errors_df = (
    inspect_classification(error_annotation, output, dirty_dataset)
)

print(calculate_metrics(error_annotation, output))

{'accuracy': 0.6960909090909091, 'precision': 0.0, 'recall': 0.0, 'f1_score': 0.0, 'roc_auc': 0.4710840408514827, 'pr_auc': 0.2611818181818182, 'true_positives_count': 2873, 'predicted_positives_count': 0}
