# Compare Labeled Data
Run this notebook in the folder with your two files to compare their differences.

In [None]:
import pandas as pd

In [None]:
def compare_dataframes(df_true, df_test):
    if df_true.shape != df_test.shape:
        return "The DataFrames have different shapes. They cannot be compared row by row."

    def is_different(val1, val2):
        # Check if both are NaN
        if pd.isna(val1) and pd.isna(val2):
            return False
        return val1 != val2

    def toq(n):
        return "yes" if n == 1 else "no"

    differences = []
    for index, row_true in df_true.iterrows():
        row_test = df_test.iloc[index]
        diffs = [(col, row_true[col], row_test[col]) for col in df_true.columns if is_different(row_true[col], row_test[col])]
        if diffs:
            differences.append(f"Row {index} with comment '{df_true.iloc[index]['COMMENT1']}' has differences: " + ", ".join([f"{col}: File1 has '{toq(true_val)}', but File2 labeled '{toq(test_val)}'" for col, true_val, test_val in diffs]))
    accuracy = len(differences) / len(df_true)
    if differences:
        return "\n".join(differences), accuracy
    else:
        return "The DataFrames are identical."

In [None]:
filename1 = input("File1 path: ")
filename2 = input("File2 path: ")

In [None]:
df1 = pd.read_csv(filename1)
df2 = pd.read_csv(filename2)

result = compare_dataframes(df1, df2)
print(result[0])


Row 7 with comment ' Everyone was fantastic, professional and empathetic thank you.' has differences: Expectations Met: File1 has 'yes', but File2 labeled 'no', Trust Staff: File1 has 'yes', but File2 labeled 'no', Feel Safe: File1 has 'yes', but File2 labeled 'no'
Row 11 with comment ' The personal was fast and polite got me out quickly and the [person_name]. Worked fine. Was accurate on the prescriptions..' has differences: Feel Safe: File1 has 'no', but File2 labeled 'yes', Positive Experience: File1 has 'no', but File2 labeled 'yes'
Row 22 with comment '(INAUDIBLE) [person_name] [person_name], and [person_name]'s, easy to work with. Hands down, the best I've ... best experience I've had (INAUDIBLE).' has differences: Trust Staff: File1 has 'yes', but File2 labeled 'no'
Row 28 with comment '(INAUDIBLE) you also made me feel like I wasn't worth a lot. You need to make people, who need help feel like they're worth it. Thank you.' has differences: Trust Staff: File1 has 'yes', but File