In [6]:
import pandas as pd
import numpy as np
import re


def natural_sort(l):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)


root_folder = 'drive/MyDrive/CARD0003/2024/'
# root_folder = './'
formatted_reponses = pd.read_csv(f'{root_folder}/reference/formatted_reponses.csv', index_col = 0)
questions = list(formatted_reponses.columns)
question_selection = questions
formatted_reponses = formatted_reponses[questions]
candidates = formatted_reponses.drop('score').index
scores = formatted_reponses.loc['score']
responses = formatted_reponses.drop('score')

sum_scores = np.sum([float(score) for score in scores])

In [7]:
def get_question_selection(initials):
  marking_groups = 'Marking Groups.xlsx'
  marking_groups = pd.read_excel(f'{root_folder}/reference/{marking_groups}').iloc[2:]
  marking_groups.columns = ['Q','mark','marker1','marker2']

  question_selection = list(marking_groups.loc[(marking_groups['marker1'] == initials) | (marking_groups['marker2'] == initials)].Q.values)
  return question_selection

In [8]:
def collate(initials1,initials2):
    marks_1 = pd.read_csv(f'{root_folder}/results/scores_{initials1}.csv').set_index('ID').astype('int')
    marks_2 = pd.read_csv(f'{root_folder}/results/scores_{initials2}.csv').set_index('ID').astype('int')


    marks_1_2  = marks_1.merge(marks_2, on = 'ID', suffixes = (f'_{initials1}',f'_{initials2}')).T.sort_index().T

    questions = marks_1.columns
    for question in questions:
        marks_1_2[f"{question}_xdiff"] = abs(marks_1_2[f"{question}_{initials1}"] - marks_1_2[f"{question}_{initials2}"])
    marks_1_2 = marks_1_2.T.sort_index()

    new_index = pd.MultiIndex.from_arrays(zip(*marks_1_2.index.str.split('_')))

    marks_1_2.index = new_index
    marks_1_2 = marks_1_2.T
    for col in marks_1_2.columns:
        if 'xdiff' not in col:
            marks_1_2[col] = marks_1_2[col].astype('int')


    column_levels = marks_1_2.columns.levels[0]
    sorted_column_levels = natural_sort(column_levels)

    # Reorder the columns according to sorted column levels
    marks_1_2 = marks_1_2.reindex(columns=pd.MultiIndex.from_product([sorted_column_levels, marks_1_2.columns.levels[1]]))
    return marks_1_2

def add_total(df, initials1,initials2):
    total_1 = df.loc[:, (slice(None), initials1)].sum(axis=1)
    total_2 = df.loc[:, (slice(None), initials2)].sum(axis=1)

    # Calculate the average between subheadings '1' and '2'
    average_between_1_and_2 = abs(total_1 - total_2)

    # Concatenate totals and average to the DataFrame
    totals_and_average = pd.concat([total_1, total_2, average_between_1_and_2], axis=1)
    totals_and_average.columns = [('Total', initials1), ('Total', initials2), ('Total', 'xdiff')]

    # Combine totals and average with the original DataFrame
    df = pd.concat([df, totals_and_average], axis=1)
    return df

def find_difference(marks_1_2):
    column_levels = marks_1_2.columns.levels[0]
    sorted_column_levels = natural_sort(column_levels)
    marks_1_2 = marks_1_2.reindex(columns=pd.MultiIndex.from_product([sorted_column_levels, marks_1_2.columns.levels[1]]))

    columns = marks_1_2.columns.levels[0]
    keep = []
    for column in columns:
        if any(marks_1_2[column]['xdiff'] >= 3):
            keep.append(column)
    marks_1_2 = marks_1_2.iloc[:, marks_1_2.columns.get_level_values(0).isin(keep)]
    return marks_1_2

In [9]:
marks_CL_VM = collate(initials1 = 'CL',initials2 = 'VM')
marks_CL_VM = add_total(marks_CL_VM, initials1 = 'CL',initials2 = 'VM')
marks_CL_VM = find_difference(marks_CL_VM)

marks_AC_AM = collate(initials1 = 'AC',initials2 = 'AM')
marks_AC_AM = add_total(marks_AC_AM, initials1 = 'AC',initials2 = 'AM')
marks_AC_AM = find_difference(marks_AC_AM)

In [12]:
marks_AC_AM

Unnamed: 0_level_0,Total,Total,Total
Unnamed: 0_level_1,AC,AM,xdiff
ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,53,50,3
2,56,56,0
3,59,51,8
4,53,52,1
5,56,51,5
6,63,59,4
7,53,52,1
8,53,53,0
9,57,53,4
10,58,53,5


In [None]:
# marks_AC_AM.to_excel('diff_AC_AM.xlsx')
# marks_CL_VM.to_excel('diff_CL_VM.xlsx')