In [4]:
from sklearn import svm
from sklearn.metrics import accuracy_score
import numpy as np
import torch
from transformers import BertTokenizer, BertModel

In [5]:
# Define the Bloom's Taxonomy categories
class BloomCategory:
    REMEMBERING = "REMEMBERING"
    UNDERSTANDING = "UNDERSTANDING"
    APPLYING = "APPLYING"
    ANALYZING = "ANALYZING"
    EVALUATING = "EVALUATING"
    CREATING = "CREATING"

# Define category values (used later for quality scoring)
category_values = {
    BloomCategory.REMEMBERING: 1,
    BloomCategory.UNDERSTANDING: 2,
    BloomCategory.APPLYING: 3,
    BloomCategory.ANALYZING: 4,
    BloomCategory.EVALUATING: 5,
    BloomCategory.CREATING: 6
}



train_x = [
    # REMEMBERING (Keywords: list, define, recall, state, identify)
    "Define the term 'photosynthesis'", 
    "List the stages of mitosis", 
    "Identify the capital city of Japan", 
    "Recall the name of the 16th U.S. President", 
    "State the three laws of motion", 
    "What is the boiling point of water?",
    "Identify the chemical formula for water", 
    "Define the theory of relativity",
    "State the first law of thermodynamics", 
    "Recall the main characters in 'To Kill a Mockingbird'", 
    "List the names of all planets in the solar system", 
    "Identify the tallest mountain in the world",

    # UNDERSTANDING (Keywords: summarize, explain, interpret, classify, describe)
    "Summarize the main events of World War II", 
    "Explain the process of cellular respiration", 
    "Describe the character development in the novel", 
    "Interpret the meaning of the poem", 
    "Classify the different types of rocks", 
    "Explain how photosynthesis works in plants",
    "Summarize the causes of the Great Depression", 
    "Explain the significance of the Magna Carta", 
    "Describe how seasons change on Earth", 
    "Classify the different types of government systems", 
    "Interpret the message of Martin Luther King Jr.'s speech", 
    "Explain how vaccines work in the human body",

    # APPLYING (Keywords: solve, use, demonstrate, apply, implement)
    "Solve this quadratic equation", 
    "Use the law of supply and demand to explain market prices", 
    "Apply Ohm's law to calculate the resistance in this circuit", 
    "Demonstrate how to tie a knot", 
    "Implement the formula to calculate compound interest", 
    "Solve a real-world problem using algebraic expressions",
    "Apply the Pythagorean theorem to find the length of the hypotenuse", 
    "Demonstrate the steps for creating a simple website", 
    "Use the periodic table to predict the reactivity of an element", 
    "Apply the principles of aerodynamics to explain how planes fly", 
    "Solve for x in the equation: 2x + 3 = 7", 
    "Use a map to determine the shortest driving route between two cities",

    # ANALYZING (Keywords: compare, contrast, examine, differentiate, analyze)
    "Compare the leadership styles of two presidents", 
    "Analyze the impact of climate change on polar bears", 
    "Examine the plot structure of the novel", 
    "Differentiate between renewable and non-renewable energy sources", 
    "Analyze the similarities and differences between socialism and capitalism", 
    "Examine the author's use of symbolism in the text",
    "Compare and contrast the properties of metals and non-metals", 
    "Analyze the reasons behind the stock market crash of 1929", 
    "Differentiate between similes and metaphors in the poem", 
    "Examine the causes and effects of the American Civil War", 
    "Analyze the impact of deforestation on biodiversity", 
    "Compare the benefits and drawbacks of solar and wind energy",

    # EVALUATING (Keywords: assess, critique, judge, argue, justify)
    "Assess the effectiveness of the new policy", 
    "Critique the author's argument on climate change", 
    "Judge the fairness of the new tax law", 
    "Argue whether electric cars are more sustainable", 
    "Justify the need for stricter environmental regulations", 
    "Evaluate the success of the company’s marketing campaign",
    "Assess the effectiveness of renewable energy sources compared to fossil fuels", 
    "Critique the use of technology in modern education", 
    "Judge the ethical implications of genetic modification", 
    "Argue for or against the use of nuclear energy", 
    "Justify the use of military intervention in international conflicts", 
    "Evaluate the impact of social media on youth culture",

    # CREATING (Keywords: design, create, compose, develop, construct)
    "Design a new logo for the company", 
    "Create a short film based on the prompt", 
    "Compose a piece of music inspired by nature", 
    "Develop a mobile app to track fitness goals", 
    "Construct a model of a sustainable city", 
    "Write a story about a hero overcoming adversity",
    "Design an experiment to test the effects of different fertilizers on plant growth", 
    "Create a storyboard for a short film about climate change", 
    "Invent a new product that could solve everyday household problems", 
    "Compose a poem about the beauty of nature", 
    "Develop a business plan for a startup company", 
    "Write a song expressing gratitude"
]

train_y = [
    # REMEMBERING
    BloomCategory.REMEMBERING, BloomCategory.REMEMBERING, BloomCategory.REMEMBERING,
    BloomCategory.REMEMBERING, BloomCategory.REMEMBERING, BloomCategory.REMEMBERING,
    BloomCategory.REMEMBERING, BloomCategory.REMEMBERING, BloomCategory.REMEMBERING,
    BloomCategory.REMEMBERING, BloomCategory.REMEMBERING, BloomCategory.REMEMBERING,

    # UNDERSTANDING
    BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING,
    BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING,
    BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING,
    BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING,

    # APPLYING
    BloomCategory.APPLYING, BloomCategory.APPLYING, BloomCategory.APPLYING,
    BloomCategory.APPLYING, BloomCategory.APPLYING, BloomCategory.APPLYING,
    BloomCategory.APPLYING, BloomCategory.APPLYING, BloomCategory.APPLYING,
    BloomCategory.APPLYING, BloomCategory.APPLYING, BloomCategory.APPLYING,

    # ANALYZING
    BloomCategory.ANALYZING, BloomCategory.ANALYZING, BloomCategory.ANALYZING,
    BloomCategory.ANALYZING, BloomCategory.ANALYZING, BloomCategory.ANALYZING,
    BloomCategory.ANALYZING, BloomCategory.ANALYZING, BloomCategory.ANALYZING,
    BloomCategory.ANALYZING, BloomCategory.ANALYZING, BloomCategory.ANALYZING,

    # EVALUATING
    BloomCategory.EVALUATING, BloomCategory.EVALUATING, BloomCategory.EVALUATING,
    BloomCategory.EVALUATING, BloomCategory.EVALUATING, BloomCategory.EVALUATING,
    BloomCategory.EVALUATING, BloomCategory.EVALUATING, BloomCategory.EVALUATING,
    BloomCategory.EVALUATING, BloomCategory.EVALUATING, BloomCategory.EVALUATING,

    # CREATING
    BloomCategory.CREATING, BloomCategory.CREATING, BloomCategory.CREATING,
    BloomCategory.CREATING, BloomCategory.CREATING, BloomCategory.CREATING,
    BloomCategory.CREATING, BloomCategory.CREATING, BloomCategory.CREATING,
    BloomCategory.CREATING, BloomCategory.CREATING, BloomCategory.CREATING
]


# New test data (completely separate from training data)
test_x = [
    # REMEMBERING
    "Name the first president of the United States.", 
    "What is the atomic number of oxygen?", 
    "Identify the longest river in the world.",
    "List the colors of the rainbow.",

    # UNDERSTANDING
    "Explain the significance of the Magna Carta.", 
    "Summarize the water cycle in your own words.", 
    "Interpret the message of the speech made by Martin Luther King Jr.", 
    "Classify the types of triangles based on their angles.",

    # APPLYING
    "Solve for x in the equation: 2x + 3 = 7.", 
    "Apply the Pythagorean theorem to find the length of the hypotenuse.", 
    "Use the periodic table to predict the reactivity of elements.", 
    "Apply the law of gravity to explain why objects fall to the ground.",

    # ANALYZING
    "Analyze the reasons behind the stock market crash of 1929.", 
    "Compare and contrast plant cells and animal cells.", 
    "Differentiate between similes and metaphors in the poem.", 
    "Examine the character motivations in Shakespeare’s 'Hamlet'.",

    # EVALUATING
    "Evaluate the success of the Apollo 11 mission.", 
    "Critique the author's argument on social inequality.", 
    "Judge the ethical implications of genetic modification.", 
    "Assess the effectiveness of renewable energy sources compared to fossil fuels.",

    # CREATING
    "Design an experiment to test the effects of different fertilizers on plant growth.", 
    "Create a storyboard for a short film about climate change.", 
    "Invent a new product that could solve everyday household problems.", 
    "Write a song expressing gratitude."
]

test_y = [
    # REMEMBERING
    BloomCategory.REMEMBERING, BloomCategory.REMEMBERING, 
    BloomCategory.REMEMBERING, BloomCategory.REMEMBERING,

    # UNDERSTANDING
    BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING, 
    BloomCategory.UNDERSTANDING, BloomCategory.UNDERSTANDING,

    # APPLYING
    BloomCategory.APPLYING, BloomCategory.APPLYING, 
    BloomCategory.APPLYING, BloomCategory.APPLYING,

    # ANALYZING
    BloomCategory.ANALYZING, BloomCategory.ANALYZING, 
    BloomCategory.ANALYZING, BloomCategory.ANALYZING,

    # EVALUATING
    BloomCategory.EVALUATING, BloomCategory.EVALUATING, 
    BloomCategory.EVALUATING, BloomCategory.EVALUATING,

    # CREATING
    BloomCategory.CREATING, BloomCategory.CREATING, 
    BloomCategory.CREATING, BloomCategory.CREATING
]


In [6]:
# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Function to get BERT embeddings
def get_bert_embeddings(texts):
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        # Use the mean of the last hidden layer's token embeddings
        embedding = outputs.last_hidden_state.mean(dim=1).numpy().flatten()
        embeddings.append(embedding)
    return np.array(embeddings)





In [7]:
# Convert training texts to BERT embeddings
train_x_vectors = get_bert_embeddings(train_x)

# Convert test texts to BERT embeddings
test_x_vectors = get_bert_embeddings(test_x)



In [8]:
# Initialize and train SVM classifier on the entire training set
clf_svm = svm.SVC(kernel='linear')
clf_svm.fit(train_x_vectors, train_y)



In [9]:
# Make predictions on the test set
test_predictions = clf_svm.predict(test_x_vectors)

In [10]:
# Calculate and print the accuracy of the model
accuracy = accuracy_score(test_y, test_predictions)
print(f"Model Accuracy on Test Data: {accuracy * 100:.2f}%")

Model Accuracy on Test Data: 95.83%


In [11]:
# Optionally: Inspect test predictions
for question, prediction, actual in zip(test_x, test_predictions, test_y):
    print(f"Question: '{question}' | Predicted: {prediction}, Actual: {actual}")

Question: 'Name the first president of the United States.' | Predicted: REMEMBERING, Actual: REMEMBERING
Question: 'What is the atomic number of oxygen?' | Predicted: REMEMBERING, Actual: REMEMBERING
Question: 'Identify the longest river in the world.' | Predicted: REMEMBERING, Actual: REMEMBERING
Question: 'List the colors of the rainbow.' | Predicted: REMEMBERING, Actual: REMEMBERING
Question: 'Explain the significance of the Magna Carta.' | Predicted: UNDERSTANDING, Actual: UNDERSTANDING
Question: 'Summarize the water cycle in your own words.' | Predicted: UNDERSTANDING, Actual: UNDERSTANDING
Question: 'Interpret the message of the speech made by Martin Luther King Jr.' | Predicted: UNDERSTANDING, Actual: UNDERSTANDING
Question: 'Classify the types of triangles based on their angles.' | Predicted: APPLYING, Actual: UNDERSTANDING
Question: 'Solve for x in the equation: 2x + 3 = 7.' | Predicted: APPLYING, Actual: APPLYING
Question: 'Apply the Pythagorean theorem to find the length of 