# **Imports &#8595;**

In [1]:
import numpy as np
import pandas as pd
import os
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from sklearn.preprocessing import OneHotEncoder
from scipy.sparse import hstack
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import GridSearchCV

# **Load Dataset &#8595;**

In [2]:
DATA_PATH = "datasets/eedi-mining-misconceptions-in-mathematics"
EXTERNAL_DATA_PATH = "datasets/eedi-external-dataset"

# train_df = pd.read_csv(f'{EXTERNAL_DATA_PATH}/all_train.csv', index_col='QuestionId')
train_df = pd.read_csv(f'{DATA_PATH}/train.csv', index_col='QuestionId') #this contains the original dataset + an external dataset generated by a LLM
test_df = pd.read_csv(f'{DATA_PATH}/test.csv')
misconceptions_df = pd.read_csv(f'{DATA_PATH}/misconception_mapping.csv')

pd.options.display.max_colwidth = 300
display(train_df.head(5))
pd.options.display.max_colwidth = 50

Unnamed: 0_level_0,ConstructId,ConstructName,SubjectId,SubjectName,CorrectAnswer,QuestionText,AnswerAText,AnswerBText,AnswerCText,AnswerDText,MisconceptionAId,MisconceptionBId,MisconceptionCId,MisconceptionDId
QuestionId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,856,Use the order of operations to carry out calculations involving powers,33,BIDMAS,A,\[\r\n3 \times 2+4-5\r\n\]\r\nWhere do the brackets need to go to make the answer equal \( 13 \) ?,\( 3 \times(2+4)-5 \),\( 3 \times 2+(4-5) \),\( 3 \times(2+4-5) \),Does not need brackets,,,,1672.0
1,1612,Simplify an algebraic fraction by factorising the numerator,1077,Simplifying Algebraic Fractions,D,"Simplify the following, if possible: \( \frac{m^{2}+2 m-3}{m-3} \)",\( m+1 \),\( m+2 \),\( m-1 \),Does not simplify,2142.0,143.0,2142.0,
2,2774,Calculate the range from a list of data,339,Range and Interquartile Range from a List of Data,B,"Tom and Katie are discussing the \( 5 \) plants with these heights:\r\n\( 24 \mathrm{~cm}, 17 \mathrm{~cm}, 42 \mathrm{~cm}, 26 \mathrm{~cm}, 13 \mathrm{~cm} \)\r\nTom says if all the plants were cut in half, the range wouldn't change.\r\nKatie says if all the plants grew by \( 3 \mathrm{~cm} \)...",Only\r\nTom,Only\r\nKatie,Both Tom and Katie,Neither is correct,1287.0,,1287.0,1073.0
3,2377,Recall and use the intersecting diagonals properties of a rectangle,88,Properties of Quadrilaterals,C,The angles highlighted on this rectangle with different length sides can never be... ![A rectangle with the diagonals drawn in. The angle on the right hand side at the centre is highlighted in red and the angle at the bottom at the centre is highlighted in yellow.](),acute,obtuse,\( 90^{\circ} \),Not enough information,1180.0,1180.0,,1180.0
4,3387,Substitute positive integer values into formulae involving powers or roots,67,Substitution into Formula,A,The equation \( f=3 r^{2}+3 \) is used to find values in the table below. What is the value covered by the star? \begin{tabular}{|c|c|c|c|c|}\r\n\hline\( r \) & \( 1 \) & \( 2 \) & \( 3 \) & \( 4 \) \\\r\n\hline\( f \) & \( 6 \) & \( 15 \) & \( \color{gold}\bigstar \) & \\\r\n\hline\r\n\end{tabu...,\( 30 \),\( 27 \),\( 51 \),\( 24 \),,,,1818.0


# **Data Preprocessing &#8595;**

In [3]:
def clean(example, columns):
    """
    Cleans the example from the Dataset
    Args:
        example: an example from the Dataset
        columns: columns that will be cleaned

    Returns: update example containing 'clean' columns

    """
    for col in columns:
        text = example[f'{col}']

        # Empty text
        if type(text) not in (str, np.str_) or text=='':
            example[f'clean_{col}'] = ''
            return example

        # 'text' from the example can be of type numpy.str_, let's convert it to a python str
        text = str(text).lower()

        # Clean the text
        text = re.sub("\"", " ", text) # removes the " from certain texts
        text = re.sub("\n", " ", text) # removes the multiple "\n"
        text = re.sub(r"(\\\w+)(\W)", r" \1 \2", text) # matches with the LaTeX commands like "\hline{}",... and transforms them to " \hline {}"
        text = re.sub(r"([\(|\{|\[|\|])", r" \1", text) # matches every opening parenthesis types and puts spaces on their left
        text = re.sub(r"([\)|\}|\]])", r"\1 ", text) # matches every closing parenthesis types and puts spaces on their right
        text = re.sub(r"\\(?![a-zA-Z])", " ", text) # removes every backslash that is not the start of a LaTeX command
        text = re.sub(r"\( | \)", "", text) # removes the parentheses that appear sometimes from nowhere
        text = re.sub(r"\[ | \]", "", text) # removes the parentheses that appear sometimes from nowhere

        text = re.sub(r" +", " ", text) # cleans the double spaces made by above substitutions
        # Update the example with the cleaned text
        example[f'clean_{col}'] = text.strip()
    return example

# testing_data = {
#     'QuestionText': ["This is a question with a newline\nin the middle"],
#     'AnswerAText': ["Answer A\nwith newline and \\table[test]"],
#     'AnswerBText': ["Answer B\nwith newline and \hline(uwo)"],
#     'AnswerCText': ["Answer C\nwith newline and \color{gold}"],
#     'AnswerDText': ["Answer D\nwith newline and \\begin{tabular}"]
# }
# df = pd.DataFrame(testing_data)
# df = df.apply(clean, axis = 1, columns = columns_to_clean)
# display(df.head(1))

columns_to_clean = ['QuestionText', 'AnswerAText', 'AnswerBText', 'AnswerCText', 'AnswerDText']
train_df = train_df.apply(clean, axis = 1, columns = columns_to_clean)

# Adjust column order
new_order = ['ConstructId', 'ConstructName', 'SubjectId', 'SubjectName', 'CorrectAnswer']
for col in columns_to_clean:
    new_order.append(col)
    new_order.append(f'clean_{col}')
new_order.extend(['MisconceptionAId', 'MisconceptionBId', 'MisconceptionCId', 'MisconceptionDId'])
train_df = train_df[new_order]


display_train_df = train_df[['QuestionText', 'clean_QuestionText','AnswerAText', 'clean_AnswerAText', 'AnswerBText', 'clean_AnswerBText', 'AnswerCText', 'clean_AnswerCText', 'AnswerDText', 'clean_AnswerDText']]
pd.options.display.max_colwidth = 300
display(display_train_df.head(1))
pd.options.display.max_colwidth = 50

Unnamed: 0_level_0,QuestionText,clean_QuestionText,AnswerAText,clean_AnswerAText,AnswerBText,clean_AnswerBText,AnswerCText,clean_AnswerCText,AnswerDText,clean_AnswerDText
QuestionId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,\[\r\n3 \times 2+4-5\r\n\]\r\nWhere do the brackets need to go to make the answer equal \( 13 \) ?,[\r 3 \times 2+4-5\r \r where do the brackets need to go to make the answer equal 13 ?,\( 3 \times(2+4)-5 \),3 \times (2+4) -5,\( 3 \times 2+(4-5) \),3 \times 2+ (4-5),\( 3 \times(2+4-5) \),3 \times (2+4-5),Does not need brackets,does not need brackets


# **Build Subject-Misconception Mapping**

In [4]:
def build_subject_to_misconception_mapping(train_df):
    """
    Create a mapping from each SubjectName to the set of MisconceptionIds linked in the training data.

    Args:
        train_df: DataFrame containing columns 'SubjectName' and 'Misconception[A/B/C/D]Id'.

    Returns:
        A dictionary where keys are subjects and values are sets of MisconceptionIds.
    """
    subject_to_misconceptions = {}
    for _, row in train_df.iterrows():
        subject = row['SubjectName']
        misconceptions = {
            row['MisconceptionAId'], 
            row['MisconceptionBId'], 
            row['MisconceptionCId'], 
            row['MisconceptionDId']
        }
        misconceptions = {m for m in misconceptions if not pd.isna(m)}  # Remove NaN values
        if subject not in subject_to_misconceptions:
            subject_to_misconceptions[subject] = set()
        subject_to_misconceptions[subject].update(misconceptions)
    return subject_to_misconceptions

subject_to_misconceptions = build_subject_to_misconception_mapping(train_df)

print("Subjects and their linked misconceptions (sample):")
for subject, misconceptions in list(subject_to_misconceptions.items())[:5]:
    print(f"Subject: {subject}, Misconceptions: {misconceptions}")

Subjects and their linked misconceptions (sample):
Subject: BIDMAS, Misconceptions: {2306.0, 706.0, 2181.0, 1862.0, 1672.0, 328.0, 1416.0, 907.0, 2316.0, 77.0, 1805.0, 15.0, 524.0, 657.0, 2326.0, 1880.0, 217.0, 27.0, 2140.0, 2270.0, 1054.0, 1507.0, 2532.0, 1316.0, 2488.0, 1510.0, 1828.0, 234.0, 1963.0, 1516.0, 1642.0, 1207.0, 1400.0, 1597.0, 2175.0}
Subject: Simplifying Algebraic Fractions, Misconceptions: {2307.0, 1540.0, 1610.0, 143.0, 792.0, 1755.0, 2398.0, 2142.0, 2078.0, 353.0, 1825.0, 167.0, 1256.0, 363.0, 113.0, 891.0, 1535.0}
Subject: Range and Interquartile Range from a List of Data, Misconceptions: {1349.0, 1287.0, 2119.0, 2346.0, 1677.0, 397.0, 1073.0, 691.0, 2551.0, 2456.0, 1177.0}
Subject: Properties of Quadrilaterals, Misconceptions: {1348.0, 1940.0, 1877.0, 85.0, 1752.0, 1180.0, 226.0, 423.0, 551.0, 106.0, 1007.0, 1009.0, 1393.0, 2355.0, 2357.0, 2102.0, 1917.0, 1790.0, 2493.0}
Subject: Substitution into Formula, Misconceptions: {1792.0, 641.0, 643.0, 389.0, 1417.0, 533.0

# **Reshape Dataset For Training &#8595;**

In [5]:
# train_df columns: QuestionID, ConstructID, ConstructName, CorrectAnswer, SubjectId, SubjectName, QuestionText, Answer[A/B/C/D]Text, Misconception[A/B/C/D]Id

reshaped_data = []
for _, row in train_df.iterrows():
    for answer, misconception_id in zip(
        ['clean_AnswerAText', 'clean_AnswerBText', 'clean_AnswerCText', 'clean_AnswerDText'],
        ['MisconceptionAId', 'MisconceptionBId', 'MisconceptionCId', 'MisconceptionDId']
    ): # turn the data into a format where each datapoint (row) represents an answer choice (i.e there are now 4 datapoints for each question)
        reshaped_data.append({
            'QuestionText': row['clean_QuestionText'],
            'AnswerText': row[answer],
            'MisconceptionId': row[misconception_id],
            'SubjectName': row['SubjectName'],
            'ConstructName': row['ConstructName']
        })

reshaped_df = pd.DataFrame(reshaped_data)
reshaped_df['OriginalIndex'] = reshaped_df.index
display(reshaped_df.head())

# removed columns: QuestionId, ConstructId, CorrectAnswer, SubjectId
# other changes: Answer[A/B/C/D]Text are now in separate datapoints along with their associated Misconception[A/B/C/D]Texts

Unnamed: 0,QuestionText,AnswerText,MisconceptionId,SubjectName,ConstructName,OriginalIndex
0,[\r 3 \times 2+4-5\r \r where do the brackets ...,3 \times (2+4) -5,,BIDMAS,Use the order of operations to carry out calcu...,0
1,[\r 3 \times 2+4-5\r \r where do the brackets ...,3 \times 2+ (4-5),,BIDMAS,Use the order of operations to carry out calcu...,1
2,[\r 3 \times 2+4-5\r \r where do the brackets ...,3 \times (2+4-5),,BIDMAS,Use the order of operations to carry out calcu...,2
3,[\r 3 \times 2+4-5\r \r where do the brackets ...,does not need brackets,1672.0,BIDMAS,Use the order of operations to carry out calcu...,3
4,"simplify the following, if possible: \frac {m^...",m+1,2142.0,Simplifying Algebraic Fractions,Simplify an algebraic fraction by factorising ...,4


# **TF-IDF & OneHot Encoding&#8595;**

In [6]:
# remove NaN values (dropping all datapoints that do not have misconceptions assigned to them)
# P.S. that means we are also deleting all the rows (answer choices) that are correct
# P.P.S. unless somehow there are correct answers that have misconceptions associated with them
print(reshaped_df['MisconceptionId'].isnull().sum())  # 10582 NaN values yikes :/
reshaped_df = reshaped_df.dropna(subset=['MisconceptionId'])
print(reshaped_df['MisconceptionId'].isnull().sum())  # 0 now yippie


3106
0


In [7]:
# use TF-IDF vectorizer for text data (5000 terms from QuestionText + AnswerText)
vectorizer = TfidfVectorizer(max_features=5000)
reshaped_df['CombinedText'] = reshaped_df['QuestionText'] + " " + reshaped_df['AnswerText']
X_tfidf = vectorizer.fit_transform(reshaped_df['CombinedText'])


In [8]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline
# use One hot encoding for categorical data (create a "column" for each unique subject and construct and represent each row with 0 and 1)
encoder = OneHotEncoder(sparse_output=False)
categorical_features = encoder.fit_transform(reshaped_df[['SubjectName', 'ConstructName']])

# Combine all features
X = hstack([X_tfidf, categorical_features])
y = reshaped_df['MisconceptionId']

#X = X.toarray()

# Summarize class distribution


# Define resampling pipeline
#over = SMOTE(sampling_strategy="not majority", k_neighbors=1, random_state=42)
#under = RandomUnderSampler(sampling_strategy="not minority", random_state=42)
#pipeline = Pipeline(steps=[('o', over), ('u', under)])

# Apply the pipeline to resample the dataset
#X_resampled, y_resampled = pipeline.fit_resample(X, y)

#smote = SMOTE(sampling_strategy="not majority", k_neighbors=1, random_state=42)
#X_train_resampled, y_train_resampled = smote.fit_resample(X, y)

oversampler = RandomOverSampler(random_state=42)
X_oversampled, y_oversampled = oversampler.fit_resample(X, y)
#Keep track of indices  for metadata
reshaped_resampled = reshaped_df.iloc[oversampler.sample_indices_].reset_index(drop=True)

X_train, X_test, y_train, y_test, train_meta, test_meta = train_test_split(
    X_oversampled,
    y_oversampled,
    reshaped_resampled,  # Use resampled metadata
    test_size=0.24
)



# **Random Forest Training&#8595;**

In [9]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)


# y_pred = rf_classifier.predict(X)
# print(classification_report(y_test, y_pred))


# **Filtering By Subject**


In [10]:
def filter_predictions_by_subject(y_pred_probs, test_subjects, misconceptions_by_subject):
    """
    Filter predictions by subject to prioritize misconceptions linked to each subject.
    If a subject is not found, all misconceptions are eligible.

    Args:
        y_pred_probs: Predicted probabilities for each class (misconceptions).
        test_subjects: List of SubjectName values for the test set.
        misconceptions_by_subject: Dictionary mapping subjects to MisconceptionIds.

    Returns:
        Filtered and normalized probabilities.
    """
    filtered_probs = []
    for probs, subject in zip(y_pred_probs, test_subjects):
        # Use all misconceptions if the subject is not in the mapping
        subject_misconceptions = misconceptions_by_subject.get(subject, set(rf_classifier.classes_))
        # Zero out probabilities for misconceptions not linked to the subject
        filtered_prob = [probs[j] if j in subject_misconceptions else 0 for j in range(len(probs))]
        # Normalize probabilities
        filtered_prob = np.array(filtered_prob) / np.sum(filtered_prob) if np.sum(filtered_prob) > 0 else probs
        filtered_probs.append(filtered_prob)
    return np.array(filtered_probs)

# **Testing&#8595;**

In [11]:
def map_at_25(y_true, y_pred_probs, top_k=25):

    map_25 = 0.0
    for true_label, pred_prob in zip(y_true, y_pred_probs):
        # Get top_k predictions
        top_preds = np.argsort(pred_prob)[::-1][:top_k]

        if not true_label:
            continue

        score = 0.0
        hits = 0
        for i, pred in enumerate(top_preds, start=1):
            if pred == true_label:
                hits += 1
                score += hits / i  # Precision at i

        # Average Precision at 25
        map_25 += score / min(1, top_k)

    return map_25 / len(y_true)

def ndcg_at_25(y_true, y_pred_probs, k=25):
    ndcg = 0.0
    for true_label, pred_prob in zip(y_true, y_pred_probs):
        top_preds = np.argsort(pred_prob)[::-1][:k]
        if not true_label:
            continue

        dcg = 0.0
        for i, pred in enumerate(top_preds, start=1):
            if pred == true_label:
                dcg += 1 / np.log2(i + 1)  # Discounted gain

        ideal_dcg = 1 / np.log2(1 + 1)  # Ideal DCG when correct at rank 1
        ndcg += dcg / ideal_dcg

    return ndcg / len(y_true)

def precision_at_25(y_true, y_pred_probs, k=25):
    precision = 0.0
    for true_label, pred_prob in zip(y_true, y_pred_probs):
        top_preds = np.argsort(pred_prob)[::-1][:k]
        if not true_label:
            continue

        correct = 1 if true_label in top_preds else 0
        precision += correct / k

    return precision / len(y_true)

def recall_at_25(y_true, y_pred_probs, k=25):
    recall = 0.0
    for true_label, pred_prob in zip(y_true, y_pred_probs):
        top_preds = np.argsort(pred_prob)[::-1][:k]
        if not true_label:
            continue

        correct = 1 if true_label in top_preds else 0
        recall += correct

    return recall / len(y_true)

def f1_at_25(y_true, y_pred_probs, k=25):
    precision = precision_at_25(y_true, y_pred_probs, k)
    recall = recall_at_25(y_true, y_pred_probs, k)
    if precision + recall == 0:
        return 0.0
    return 2 * (precision * recall) / (precision + recall)


In [12]:
y_val_pred_probs = rf_classifier.predict_proba(X_test)

# Get the subject names for test data
test_subjects = test_meta['SubjectName'].values
# Apply subject-based filtering
filtered_y_pred_probs = filter_predictions_by_subject(
    y_pred_probs=y_val_pred_probs,
    test_subjects=test_subjects,
    misconceptions_by_subject=subject_to_misconceptions
)

y_val_true = list(y_test)

#Compare evaluation metrics for unfiltered and filtered predictions
map25_score_unfiltered = map_at_25(y_val_true, y_val_pred_probs)
ndcg_score_unfiltered = ndcg_at_25(y_val_true, y_val_pred_probs, k=25)
precision_score_unfiltered = precision_at_25(y_val_true, y_val_pred_probs, k=25)
recall_score_unfiltered = recall_at_25(y_val_true, y_val_pred_probs, k=25)
f1_score_unfiltered = f1_at_25(y_val_true, y_val_pred_probs, k=25)

map25_score_filtered = map_at_25(y_val_true, filtered_y_pred_probs)
ndcg_score_filtered = ndcg_at_25(y_val_true, filtered_y_pred_probs, k=25)
precision_score_filtered = precision_at_25(y_val_true, filtered_y_pred_probs, k=25)
recall_score_filtered = recall_at_25(y_val_true, filtered_y_pred_probs, k=25)
f1_score_filtered = f1_at_25(y_val_true, filtered_y_pred_probs, k=25)

# Print Unfiltered Scores
print("Unfiltered Scores:")
print(f"MAP@25 Score: {map25_score_unfiltered}")
print(f"NDCG@25: {ndcg_score_unfiltered}")
print(f"Precision@25: {precision_score_unfiltered}")
print(f"Recall@25: {recall_score_unfiltered}")
print(f"F1@25: {f1_score_unfiltered}")

# Print Filtered Scores
print("\nFiltered Scores:")
print(f"MAP@25 Score (Filtered): {map25_score_filtered}")
print(f"NDCG@25 (Filtered): {ndcg_score_filtered}")
print(f"Precision@25 (Filtered): {precision_score_filtered}")
print(f"Recall@25 (Filtered): {recall_score_filtered}")
print(f"F1@25 (Filtered): {f1_score_filtered}")


Unfiltered Scores:
MAP@25 Score: 0.0029191385781178257
NDCG@25: 0.004476889603796487
Precision@25: 0.0004233211468154699
Recall@25: 0.010583028670386761
F1@25: 0.0008140791284912882

Filtered Scores:
MAP@25 Score (Filtered): 0.0034608888552923865
NDCG@25 (Filtered): 0.004886357465005327
Precision@25 (Filtered): 0.0004233211468154699
Recall@25 (Filtered): 0.010583028670386761
F1@25 (Filtered): 0.0008140791284912882
Unfiltered Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        15
         1.0       1.00      1.00      1.00        11
         2.0       1.00      1.00      1.00        12
         3.0       1.00      1.00      1.00         6
         4.0       0.40      0.44      0.42         9
         6.0       0.80      1.00      0.89        12
         8.0       1.00      1.00      1.00        12
         9.0       1.00      1.00      1.00        12
        11.0       1.00      0.59      0.74        17
        14.0       1.00      1.00      1.00        15
        15.0       0.91      1.00      0.95        10
        16.0       1.00      1.00      1.00        14
        19.0       1.00      1.00      1.00        11
        20.0       1.00      1.00      1.00        10
        21.0       1.00      1.00      1.00        20
        22.0       0.83      1.00      0.91        15
        23.0       0.35      1.00      0.52         7
        26.0       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
y_pred_probs = rf_classifier.predict_proba(X_test)

# print predictions
for idx, (true_label, pred_prob) in enumerate(zip(y_test, y_pred_probs)):
    # Get top 25 predictions and probabilities
    top_preds = np.argsort(pred_prob)[::-1][:25]
    top_probs = pred_prob[top_preds]

    # Check if true is within top 25
    in_top_25 = true_label in top_preds

    print(f"Example {idx + 1}")
    print(f"True Label: {true_label}")
    print("Top 25 Predictions (Misconception ID: Probability):")
    for pred, prob in zip(top_preds, top_probs):
        print(f"ID {pred}: {prob:.4f}")
    print(f"True Label in Top 25: {in_top_25}\n")

    # Number of questions to print
    if idx == 10:
        break

Example 1
True Label: 130.0
Top 25 Predictions (Misconception ID: Probability):
ID 86: 0.8024
ID 869: 0.1976
ID 1603: 0.0000
ID 536: 0.0000
ID 528: 0.0000
ID 529: 0.0000
ID 530: 0.0000
ID 531: 0.0000
ID 532: 0.0000
ID 533: 0.0000
ID 534: 0.0000
ID 535: 0.0000
ID 537: 0.0000
ID 526: 0.0000
ID 538: 0.0000
ID 539: 0.0000
ID 540: 0.0000
ID 541: 0.0000
ID 542: 0.0000
ID 543: 0.0000
ID 544: 0.0000
ID 545: 0.0000
ID 546: 0.0000
ID 547: 0.0000
ID 527: 0.0000
True Label in Top 25: False

Example 2
True Label: 223.0
Top 25 Predictions (Misconception ID: Probability):
ID 140: 1.0000
ID 1603: 0.0000
ID 539: 0.0000
ID 529: 0.0000
ID 530: 0.0000
ID 531: 0.0000
ID 532: 0.0000
ID 533: 0.0000
ID 534: 0.0000
ID 535: 0.0000
ID 536: 0.0000
ID 537: 0.0000
ID 538: 0.0000
ID 540: 0.0000
ID 502: 0.0000
ID 541: 0.0000
ID 542: 0.0000
ID 543: 0.0000
ID 544: 0.0000
ID 545: 0.0000
ID 546: 0.0000
ID 547: 0.0000
ID 548: 0.0000
ID 549: 0.0000
ID 528: 0.0000
True Label in Top 25: False

Example 3
True Label: 1920.0
To