In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import ast
import tabulate
import pyperclip

In [2]:
model_list = ['bert-base-cased', 'MathBert', 'roberta-base']

# Preds to Labels

In [3]:
model_name = model_list[2]
preds_data = pd.read_csv(f'Preds_second_ver/2024-07-29-16/Preds_{model_name}_top_k.csv')
preds_data

Unnamed: 0,id,Question,top_1_preds,top_2_preds,top_3_preds
0,566,Question: Jesse's room is 12 feet long and 8 f...,[8],[4 8],[9 4 8]
1,766,Question: A bus can transport 48 passengers. I...,[12],[ 4 12],[ 7 4 12]
2,860,Question: Kathy scored 360 in first quarter an...,[16],[15 16],[ 2 15 16]
3,74,Question: The clown blew up forty-seven balloo...,[3],[15 3],[ 0 15 3]
4,1794,Question: Amy's dad was taking everyone out to...,[18],[ 0 18],[ 5 0 18]
...,...,...,...,...,...
95,1313,Question: Sum of a number and 15 is 96. Find t...,[13],[ 3 13],[17 3 13]
96,296,Question: Jasmine and her mom went to the groc...,[3],[2 3],[15 2 3]
97,1007,Question: Amy has 67 balloons. 29 of them are ...,[3],[15 3],[ 1 15 3]
98,63,Question: Jose has 28 chickens and 18 ducks. H...,[3],[15 3],[ 0 15 3]


In [4]:
type(preds_data['top_3_preds'][3])

str

In [5]:
id_to_label = {0: '1.OA.A.1', 1: '2.NBT.B.5', 2: '2.NBT.B.7', 3: '2.OA.A.1', 4: '3.OA.A.3', 
               5: '3.OA.A.8', 6: '3.OA.D.8', 7: '4.MD.A.2', 8: '4.MD.A.3', 9: '4.NBT.B.5', 
               10: '4.NBT.B.6', 11: '4.OA.A.3', 12: '5.NBT.B.6', 13: '6.EE.B.6', 
               14: '6.NS.B.4', 15: '6.RP.A.1', 16: '6.RP.A.3', 17: '8.EE.C.8', 18: 'K.OA.A.2'}

In [6]:
# Function to convert string representations of lists to actual lists of integers
def str_to_list(s):
    if isinstance(s, list):
        return s
    s = s.strip('[]').strip()
    # Split by whitespace and filter out empty strings, then convert to integers
    return [int(i) for i in s.split() if i]

# Convert the string representations to actual lists
preds_data['top_1_preds'] = preds_data['top_1_preds'].apply(str_to_list)
preds_data['top_2_preds'] = preds_data['top_2_preds'].apply(str_to_list)
preds_data['top_3_preds'] = preds_data['top_3_preds'].apply(str_to_list)



# Function to map IDs to labels and join them with commas
def map_ids_to_labels(pred_ids):
    return ','.join([ ' '+id_to_label[i] for i in pred_ids])

# Create columns for top_1_labels, top_2_labels, and top_3_labels
preds_data['Top_1_labels'] = preds_data['top_1_preds'].apply(map_ids_to_labels)
preds_data['Top_2_labels'] = preds_data['top_2_preds'].apply(map_ids_to_labels)
preds_data['Top_3_labels'] = preds_data['top_3_preds'].apply(map_ids_to_labels)

preds_data

Unnamed: 0,id,Question,top_1_preds,top_2_preds,top_3_preds,Top_1_labels,Top_2_labels,Top_3_labels
0,566,Question: Jesse's room is 12 feet long and 8 f...,[8],"[4, 8]","[9, 4, 8]",4.MD.A.3,"3.OA.A.3, 4.MD.A.3","4.NBT.B.5, 3.OA.A.3, 4.MD.A.3"
1,766,Question: A bus can transport 48 passengers. I...,[12],"[4, 12]","[7, 4, 12]",5.NBT.B.6,"3.OA.A.3, 5.NBT.B.6","4.MD.A.2, 3.OA.A.3, 5.NBT.B.6"
2,860,Question: Kathy scored 360 in first quarter an...,[16],"[15, 16]","[2, 15, 16]",6.RP.A.3,"6.RP.A.1, 6.RP.A.3","2.NBT.B.7, 6.RP.A.1, 6.RP.A.3"
3,74,Question: The clown blew up forty-seven balloo...,[3],"[15, 3]","[0, 15, 3]",2.OA.A.1,"6.RP.A.1, 2.OA.A.1","1.OA.A.1, 6.RP.A.1, 2.OA.A.1"
4,1794,Question: Amy's dad was taking everyone out to...,[18],"[0, 18]","[5, 0, 18]",K.OA.A.2,"1.OA.A.1, K.OA.A.2","3.OA.A.8, 1.OA.A.1, K.OA.A.2"
...,...,...,...,...,...,...,...,...
95,1313,Question: Sum of a number and 15 is 96. Find t...,[13],"[3, 13]","[17, 3, 13]",6.EE.B.6,"2.OA.A.1, 6.EE.B.6","8.EE.C.8, 2.OA.A.1, 6.EE.B.6"
96,296,Question: Jasmine and her mom went to the groc...,[3],"[2, 3]","[15, 2, 3]",2.OA.A.1,"2.NBT.B.7, 2.OA.A.1","6.RP.A.1, 2.NBT.B.7, 2.OA.A.1"
97,1007,Question: Amy has 67 balloons. 29 of them are ...,[3],"[15, 3]","[1, 15, 3]",2.OA.A.1,"6.RP.A.1, 2.OA.A.1","2.NBT.B.5, 6.RP.A.1, 2.OA.A.1"
98,63,Question: Jose has 28 chickens and 18 ducks. H...,[3],"[15, 3]","[0, 15, 3]",2.OA.A.1,"6.RP.A.1, 2.OA.A.1","1.OA.A.1, 6.RP.A.1, 2.OA.A.1"


In [7]:
# Save the updated DataFrame to a new CSV file
preds_data[['id','Question', 'Top_1_labels', 'Top_2_labels', 'Top_3_labels']].to_csv(f'Preds_second_ver/2024-07-29-16/Labels_{model_name}_top_k.csv', index=False)

# Labels Evaluation

In [8]:
model_name = model_list[0]
def check_agreement(row, column_name):
    labels_list = row[column_name].split(", ")
    labels_list = [kc.strip() for kc in labels_list]
    return row["Agreed KC"] in labels_list

def check_label_in(row):
    label_list = list(id_to_label.values())
    return row["Agreed KC"] in label_list

In [10]:
results = []
for model_name in model_list:
    preds_data = pd.read_csv(f'Preds_second_ver/2024-07-29-16/Labels_{model_name}_top_k.csv')
    ground_truth_data = pd.read_csv('data_second_ver/ASDIv_Test_4.csv')
    ground_truth_data.rename(columns={'Question_ID': 'id'}, inplace=True)
    preds_data = pd.merge(preds_data, ground_truth_data[['id','Agreed KC']], on='id')
    
    eval_data = pd.DataFrame(columns=preds_data.columns)

    rows_to_append = []
    for index, row in preds_data.iterrows():
        kc = row['Agreed KC']
        kc_list = kc.split(',')
        kc_list = [kc.strip() for kc in kc_list]
        for kc in kc_list:
            new_row = row.copy()
            new_row['Agreed KC'] = kc
            rows_to_append.append(new_row)

    eval_data = pd.concat([eval_data, pd.DataFrame(rows_to_append)], ignore_index=True)
    
    eval_data["Top_1_agreement"] = eval_data.apply(check_agreement, column_name="Top_1_labels", axis=1)
    eval_data["Top_2_agreement"] = eval_data.apply(check_agreement, column_name="Top_2_labels", axis=1)
    eval_data["Top_3_agreement"] = eval_data.apply(check_agreement, column_name="Top_3_labels", axis=1)

    accuracy_top_1 = eval_data["Top_1_agreement"].mean()
    accuracy_top_2 = eval_data["Top_2_agreement"].mean()
    accuracy_top_3 = eval_data["Top_3_agreement"].mean()
    
    eval_data["Agreed KC in Labels"] = eval_data.apply(check_label_in, axis=1)
    accuracy_in_labels = eval_data["Agreed KC in Labels"].sum()/len(eval_data)
    
    actual_accuracy_top_1 = accuracy_top_1/accuracy_in_labels
    actual_accuracy_top_2 = accuracy_top_2/accuracy_in_labels
    actual_accuracy_top_3 = accuracy_top_3/accuracy_in_labels
    
    results.append([
    f'{model_name}', 
    f'{accuracy_top_1:.3f} ({actual_accuracy_top_1:.3f})', 
    f'{accuracy_top_2:.3f} ({actual_accuracy_top_2:.3f})', 
    f'{accuracy_top_3:.3f} ({actual_accuracy_top_3:.3f})', 
    f'{accuracy_in_labels:.3f}'
])


tabels = tabulate.tabulate(results, headers=["Model", "Top 1 Accuracy", "Top 2 Accuracy", "Top 3 Accuracy", "Rate of Agreed KCs in Labels"], tablefmt='pipe')
pyperclip.copy(tabels)
print(tabels)



| Model           | Top 1 Accuracy   | Top 2 Accuracy   | Top 3 Accuracy   |   Rate of Agreed KCs in Labels |
|:----------------|:-----------------|:-----------------|:-----------------|-------------------------------:|
| bert-base-cased | 0.636 (0.769)    | 0.709 (0.857)    | 0.727 (0.879)    |                          0.827 |
| MathBert        | 0.527 (0.637)    | 0.682 (0.824)    | 0.718 (0.868)    |                          0.827 |
| roberta-base    | 0.655 (0.791)    | 0.700 (0.846)    | 0.736 (0.890)    |                          0.827 |
