In [5]:
import pandas as pd
from pathlib import Path
from typing import List

In [6]:
ms1_path = Path("/home/oliver/Oliver.Mono/projects/Vision.OPT_MULT/data/csvs/ms_1.csv")
ms2_path = Path("/home/oliver/Oliver.Mono/projects/Vision.OPT_MULT/data/csvs/ms_2.csv")

team = "curie"
gd1_path = Path(f"/home/oliver/Oliver.Mono/projects/Vision.OPT_MULT/data/csvs/{team}_1.csv")
gd2_path = Path(f"/home/oliver/Oliver.Mono/projects/Vision.OPT_MULT/data/csvs/{team}_2.csv")

ms1_df = pd.read_csv(ms1_path)
ms2_df = pd.read_csv(ms2_path)

gd1_df = pd.read_csv(gd1_path)
gd2_df = pd.read_csv(gd2_path)

In [7]:
def compare_ms_to_answers(ms: List[pd.DataFrame], ans: List[pd.DataFrame]):
    result_list = []
    for ms_single, ans_single in zip(ms, ans):
        result_list.append(compare_answers(ans_single, ms_single))
        print(result_list[-1])
    
    correct_answers = 0
    unanswered = 0
    wrong_answers = 0
    for result in result_list:
        correct_answers += result["consistent_rows"]
        unanswered += result["unanswered_rows"]
        wrong_answers += result["incorrect_rows"]
    print(f"Total correct answers: {correct_answers}")
    print(f"Total incorrect answers: {wrong_answers}")
    print(f"Total questions: {correct_answers+unanswered+wrong_answers}")

def compare_answers(df1, df2):
    if df1.shape != df2.shape:
        raise ValueError(f"DataFrames must have the same shape {df1.shape} {df2.shape}")

    consistent_indices = []
    unanswered_indices = []
    incorrect_indices = []
    unanswered_count = 0
    incorrect_count = 0
    
    for idx, (row1, row2) in enumerate(zip(df1.values, df2.values)):
        if all(val1 == val2 for val1, val2 in zip(row1, row2)):
            consistent_indices.append(idx)
        else:
            if all(val == " " for val in row1[1:]):
                unanswered_indices.append(idx)
                unanswered_count += 1
            else:
                incorrect_indices.append(idx)
                incorrect_count += 1
    
    return {
        'consistent_rows': len(consistent_indices),
        'consistent_indices': consistent_indices,
        'unanswered_rows': unanswered_count,
        'unanswered_indices': unanswered_indices,
        'incorrect_rows': incorrect_count,
        'incorrect_indices': incorrect_indices
    }

In [8]:
results = compare_ms_to_answers(ms=[ms1_df, ms2_df], ans=[gd1_df, gd2_df])

{'consistent_rows': 12, 'consistent_indices': [4, 6, 7, 8, 9, 12, 17, 19, 20, 25, 26, 29], 'unanswered_rows': 9, 'unanswered_indices': [2, 3, 5, 11, 13, 16, 21, 22, 24], 'incorrect_rows': 9, 'incorrect_indices': [0, 1, 10, 14, 15, 18, 23, 27, 28]}
{'consistent_rows': 8, 'consistent_indices': [1, 6, 8, 9, 12, 19, 20, 26], 'unanswered_rows': 12, 'unanswered_indices': [0, 2, 3, 4, 5, 11, 13, 16, 17, 18, 22, 27], 'incorrect_rows': 9, 'incorrect_indices': [7, 10, 14, 15, 21, 23, 24, 25, 28]}
Total correct answers: 20
Total incorrect answers: 18
Total questions: 59
