In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import multilabel_confusion_matrix, f1_score, accuracy_score
import numpy as np

# Create a synthetic dataset
data = {
    'text': [
        "Define the law of conservation of energy.",
        "Summarize the main plot of 'Hamlet'.",
        "Explain the process of cellular respiration in your own words.",
        "Apply the Pythagorean theorem to calculate the length of the hypotenuse.",
        "Use the company's style guide to write a marketing email.",
        "Compare and contrast the economic policies of Keynes and Hayek.",
        "Analyze the causes of the French Revolution.",
        "Critique the author's use of foreshadowing in the novel.",
        "Assess the effectiveness of the new government program.",
        "Design a new architectural plan for a sustainable house.",
        "Formulate a novel hypothesis to explain the observed phenomenon.",
        "Compose a symphony that blends classical and modern elements."
    ],
    # Labels represent [Remember, Understand, Apply, Analyze, Evaluate, Create]
    'labels': [
        [1.0, 0.2, 0.0, 0.0, 0.0, 0.0],  # Remembering
        [0.8, 0.5, 0.1, 0.0, 0.0, 0.0],  # Understanding
        [0.2, 1.0, 0.3, 0.1, 0.0, 0.0],  # Understanding
        [0.1, 0.4, 1.0, 0.2, 0.0, 0.0],  # Applying
        [0.0, 0.2, 1.0, 0.4, 0.3, 0.0],  # Applying
        [0.1, 0.4, 0.2, 1.0, 0.5, 0.0],  # Analyzing
        [0.2, 0.5, 0.3, 1.0, 0.4, 0.1],  # Analyzing
        [0.1, 0.3, 0.1, 0.8, 1.0, 0.2],  # Evaluating
        [0.0, 0.2, 0.4, 0.7, 1.0, 0.3],  # Evaluating
        [0.0, 0.1, 0.3, 0.5, 0.6, 1.0],  # Creating
        [0.0, 0.1, 0.2, 0.6, 0.7, 1.0],  # Creating
        [0.0, 0.0, 0.1, 0.4, 0.5, 1.0],  # Creating
    ]
}

df = pd.DataFrame(data)

# Define the labels for our model
labels = ['Remember', 'Understand', 'Apply', 'Analyze', 'Evaluate', 'Create']

In [17]:
df

Unnamed: 0,text,labels
0,Define the law of conservation of energy.,"[1.0, 0.2, 0.0, 0.0, 0.0, 0.0]"
1,Summarize the main plot of 'Hamlet'.,"[0.8, 0.5, 0.1, 0.0, 0.0, 0.0]"
2,Explain the process of cellular respiration in...,"[0.2, 1.0, 0.3, 0.1, 0.0, 0.0]"
3,Apply the Pythagorean theorem to calculate the...,"[0.1, 0.4, 1.0, 0.2, 0.0, 0.0]"
4,Use the company's style guide to write a marke...,"[0.0, 0.2, 1.0, 0.4, 0.3, 0.0]"
5,Compare and contrast the economic policies of ...,"[0.1, 0.4, 0.2, 1.0, 0.5, 0.0]"
6,Analyze the causes of the French Revolution.,"[0.2, 0.5, 0.3, 1.0, 0.4, 0.1]"
7,Critique the author's use of foreshadowing in ...,"[0.1, 0.3, 0.1, 0.8, 1.0, 0.2]"
8,Assess the effectiveness of the new government...,"[0.0, 0.2, 0.4, 0.7, 1.0, 0.3]"
9,Design a new architectural plan for a sustaina...,"[0.0, 0.1, 0.3, 0.5, 0.6, 1.0]"


In [18]:
# Load the dataset (assuming 'train.csv' exists with text and labels)
train_df = pd.read_csv('train.csv')

In [19]:
for col in train_df.columns[1:]:
  # Divide the column by 10
  train_df[col] = train_df[col] / 10

In [20]:
train_df[-2:]

Unnamed: 0,question,creation_complexity,evaluation_complexity,analysis_complexity,synthesis_complexity,applying_complexity,hypothesis_complexity
18,How can a small nation balance economic growth...,0.7,0.6,0.8,0.7,0.6,0.5
19,If you were to design an AI system to mediate ...,0.8,0.7,0.8,0.7,0.6,0.4


In [21]:
label_cols = ['creation_complexity', 'evaluation_complexity', 'analysis_complexity', 'synthesis_complexity', 'applying_complexity', 'hypothesis_complexity']
train_df['labels'] = train_df[label_cols].values.tolist()

In [22]:
train_df[-2:]

Unnamed: 0,question,creation_complexity,evaluation_complexity,analysis_complexity,synthesis_complexity,applying_complexity,hypothesis_complexity,labels
18,How can a small nation balance economic growth...,0.7,0.6,0.8,0.7,0.6,0.5,"[0.7, 0.6, 0.8, 0.7, 0.6, 0.5]"
19,If you were to design an AI system to mediate ...,0.8,0.7,0.8,0.7,0.6,0.4,"[0.8, 0.7, 0.8, 0.7, 0.6, 0.4]"


In [23]:
# Initialize tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Create a custom dataset class
class BloomsTaxonomyDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = torch.tensor(self.labels[idx], dtype=torch.float)

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': label
        }

# Create the dataset object
train_dataset = BloomsTaxonomyDataset(
    texts=train_df['question'].tolist(),
    labels=train_df['labels'].tolist(),
    tokenizer=tokenizer
)

In [24]:
# Load the model and configure it for multi-label classification
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=len(labels),
    problem_type="multi_label_classification" # Crucial for multi-label setup
)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=15, # Increase epochs for small datasets
    per_device_train_batch_size=4,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_strategy="epoch",      # Not used here, but good practice
    report_to="none",
)

In [26]:
# Define a function to compute metrics
def compute_metrics(p):
    # The model outputs logits, so we apply sigmoid first
    logits = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(logits))

    # Use a threshold to convert probabilities to binary predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= 0.5)] = 1

    y_true = p.label_ids

    # Calculate metrics
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)

    return {
        'f1_micro': f1_micro_average,
        'accuracy': accuracy
    }

In [27]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    # You would add an eval_dataset here for proper evaluation
    # eval_dataset=eval_dataset,
    # compute_metrics=compute_metrics, # Metrics function for evaluation
)



In [28]:
# Train the model
trainer.train()

Step,Training Loss
10,0.682
20,0.6433
30,0.6066
40,0.5895
50,0.5843
60,0.5814
70,0.5797


TrainOutput(global_step=75, training_loss=0.6075065072377522, metrics={'train_runtime': 214.0848, 'train_samples_per_second': 1.401, 'train_steps_per_second': 0.35, 'total_flos': 9935763609600.0, 'train_loss': 0.6075065072377522, 'epoch': 15.0})

In [29]:
# Create a function for prediction
def evaluate_text(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    # Get model predictions (logits)
    with torch.no_grad():
        outputs = model(**inputs)

    # Convert logits to probabilities (0-1 scores) using sigmoid
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(outputs.logits.squeeze())

    # Create a dictionary of scores
    scores = {label: prob.item() for label, prob in zip(labels, probs)}
    return scores


In [30]:
# --- Example Usage ---
new_text = "Critique the new environmental policy based on its economic and social impact."
scores = evaluate_text(new_text)

print(f"Evaluation for text: '{new_text}'")
for label, score in scores.items():
    print(f"- {label}: {score:.4f}")

Evaluation for text: 'Critique the new environmental policy based on its economic and social impact.'
- Remember: 0.5621
- Understand: 0.6594
- Apply: 0.7294
- Analyze: 0.6500
- Evaluate: 0.5870
- Create: 0.4441


In [31]:
new_text_2 = "List the primary colors."
scores_2 = evaluate_text(new_text_2)

print(f"\nEvaluation for text: '{new_text_2}'")
for label, score in scores_2.items():
    print(f"- {label}: {score:.4f}")


Evaluation for text: 'List the primary colors.'
- Remember: 0.2507
- Understand: 0.2894
- Apply: 0.3705
- Analyze: 0.2973
- Evaluate: 0.2988
- Create: 0.1881


In [32]:
# prompt: Save trained model to a disk

# Specify the directory where you want to save the model
output_dir = "./saved_model"

# Save the model and tokenizer
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Model saved to {output_dir}")

Model saved to ./saved_model


In [33]:
# prompt: Load saved model into memory and evaluate text

# Specify the path to the saved model directory
saved_model_path = "./saved_model"

# Load the saved model and tokenizer
loaded_model = DistilBertForSequenceClassification.from_pretrained(saved_model_path)
loaded_tokenizer = DistilBertTokenizer.from_pretrained(saved_model_path)

# Create a function for prediction using the loaded model
def evaluate_text_loaded(text):
    # Tokenize the input text using the loaded tokenizer
    inputs = loaded_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move inputs to the same device as the model (if using GPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loaded_model.to(device)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Get model predictions (logits)
    with torch.no_grad():
        outputs = loaded_model(**inputs)

    # Convert logits to probabilities (0-1 scores) using sigmoid
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(outputs.logits.squeeze())

    # Create a dictionary of scores
    scores = {label: prob.item() for label, prob in zip(labels, probs)}
    return scores



In [34]:
# --- Example Usage with Loaded Model ---
new_text_3 = "Describe the key characteristics of Impressionism."
scores_3 = evaluate_text_loaded(new_text_3)

print(f"\nEvaluation for text: '{new_text_3}' (using loaded model)")
for label, score in scores_3.items():
    print(f"- {label}: {score:.4f}")

new_text_4 = "Develop a marketing strategy for a new eco-friendly product."
scores_4 = evaluate_text_loaded(new_text_4)

print(f"\nEvaluation for text: '{new_text_4}' (using loaded model)")
for label, score in scores_4.items():
    print(f"- {label}: {score:.4f}")


Evaluation for text: 'Describe the key characteristics of Impressionism.' (using loaded model)
- Remember: 0.4471
- Understand: 0.5556
- Apply: 0.5769
- Analyze: 0.5658
- Evaluate: 0.4913
- Create: 0.3610

Evaluation for text: 'Develop a marketing strategy for a new eco-friendly product.' (using loaded model)
- Remember: 0.6238
- Understand: 0.6396
- Apply: 0.6878
- Analyze: 0.6785
- Evaluate: 0.5681
- Create: 0.4287
