In [1]:
pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
Note: you may need to restart the kernel to use updated packages.


In [18]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

import evaluate
import numpy as np
from transformers import DataCollatorWithPadding

In [19]:
from datasets import load_dataset

dataset_dict = load_dataset("hebashakeel/Bert-classification-wellness")

In [20]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 1118
    })
    validation: Dataset({
        features: ['text', 'labels'],
        num_rows: 239
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 241
    })
})

In [21]:
from transformers import RobertaConfig, RobertaModel, RobertaForSequenceClassification
from transformers import RobertaTokenizer

# Initializing a RoBERTa configuration
configuration = RobertaConfig()

# Initializing a model (with random weights) from the configuration
# model = RobertaModel(configuration)

# Define dropout rates
config = RobertaConfig.from_pretrained(
    'roberta-base',
    hidden_dropout_prob=0.5,         # Dropout in fully connected layers
    attention_probs_dropout_prob=0.5, # Dropout in attention probabilities
    num_labels=4,
    id2label=id2label, 
    label2id=label2id,
)

id2label = {0: "PA", 1: "IVA", 2: "SA", 3: "SEA"}
label2id = {"PA": 0, "IVA": 1, "SA": 2, "SEA": 3}

model = RobertaForSequenceClassification.from_pretrained("roberta-base", config = config)

# Accessing the model configuration
configuration = model.config

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
# print layers
for name, param in model.named_parameters():
   print(name, param.requires_grad)

roberta.embeddings.word_embeddings.weight True
roberta.embeddings.position_embeddings.weight True
roberta.embeddings.token_type_embeddings.weight True
roberta.embeddings.LayerNorm.weight True
roberta.embeddings.LayerNorm.bias True
roberta.encoder.layer.0.attention.self.query.weight True
roberta.encoder.layer.0.attention.self.query.bias True
roberta.encoder.layer.0.attention.self.key.weight True
roberta.encoder.layer.0.attention.self.key.bias True
roberta.encoder.layer.0.attention.self.value.weight True
roberta.encoder.layer.0.attention.self.value.bias True
roberta.encoder.layer.0.attention.output.dense.weight True
roberta.encoder.layer.0.attention.output.dense.bias True
roberta.encoder.layer.0.attention.output.LayerNorm.weight True
roberta.encoder.layer.0.attention.output.LayerNorm.bias True
roberta.encoder.layer.0.intermediate.dense.weight True
roberta.encoder.layer.0.intermediate.dense.bias True
roberta.encoder.layer.0.output.dense.weight True
roberta.encoder.layer.0.output.dense.bia

In [24]:
# define text preprocessing
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

In [25]:
# tokenize all datasets
tokenized_data = dataset_dict.map(preprocess_function, batched=True)

In [27]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [28]:
from scipy.special import softmax

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # Apply softmax to get probabilities
    probabilities = softmax(predictions, axis=1)
    
    # Compute AUC for each class in a one-vs-rest fashion
    aucs = []
    for class_idx in range(4): 
        binary_labels = (labels == class_idx).astype(int)
        auc = auc_score.compute(prediction_scores=probabilities[:, class_idx], references=binary_labels)['roc_auc']
        aucs.append(auc)
    avg_auc = np.round(np.mean(aucs), 3)
    
    # Predict most probable class
    predicted_classes = np.argmax(predictions, axis=1)
    
    # Compute accuracy
    acc = np.round(accuracy.compute(predictions=predicted_classes, references=labels)['accuracy'], 3)
    
    # Compute class-wise precision and recall
    class_precision = {}
    class_recall = {}
    for class_idx in range(4):
        tp = np.sum((predicted_classes == class_idx) & (labels == class_idx))
        fp = np.sum((predicted_classes == class_idx) & (labels != class_idx))
        fn = np.sum((predicted_classes != class_idx) & (labels == class_idx))
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        
        class_precision[f"Precision_Class_{class_idx}"] = np.round(precision, 3)
        class_recall[f"Recall_Class_{class_idx}"] = np.round(recall, 3)
    
    return {
        "Accuracy": acc, 
        "AUC": avg_auc,
        **class_precision,
        **class_recall
    }


In [29]:
# hyperparameters
lr = 3e-5 
batch_size = 8 
num_epochs = 10 

training_args = TrainingArguments(
    output_dir="roberta-wellness-classifier",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_strategy="epoch",  # Change to "steps" if troubleshooting
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)


In [30]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
) 

trainer.train()

Epoch,Training Loss,Validation Loss


NameError: name 'auc_score' is not defined

In [None]:
# apply model to validation dataset
predictions = trainer.predict(tokenized_data["validation"])

# Extract the logits and labels from the predictions object
logits = predictions.predictions
labels = predictions.label_ids

# Use your compute_metrics function
metrics = compute_metrics((logits, labels))
print(metrics)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Step 1: Make predictions
predictions = trainer.predict(tokenized_data["test"])

# Convert predictions to class labels
predicted_classes = np.argmax(predictions.predictions, axis=1)
true_labels = predictions.label_ids

# Step 2: Generate the confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_classes)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=range(4), yticklabels=range(4))
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

# Step 3: Print classification report
class_report = classification_report(true_labels, predicted_classes, target_names=[f"Class {i}" for i in range(4)])
print("Classification Report:\n", class_report)


In [None]:
predictions = trainer.predict(tokenized_data["validation"])

predicted_classes = np.argmax(predictions.predictions, axis=1)
true_labels = predictions.label_ids

conf_matrix = confusion_matrix(true_labels, predicted_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=range(4), yticklabels=range(4))
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

class_report = classification_report(true_labels, predicted_classes, target_names=[f"Class {i}" for i in range(4)])
print("Classification Report:\n", class_report)


In [None]:
from sklearn.metrics import precision_recall_curve
import numpy as np

# Assuming predictions and true_labels are defined as in previous code
for i in range(4):
    binary_labels = (true_labels == i).astype(int)
    probabilities = predictions.predictions[:, i]

    precision, recall, _ = precision_recall_curve(binary_labels, probabilities)
    plt.plot(recall, precision, label=f"Class {i}")

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for Each Class")
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import roc_curve, auc

for i in range(4):
    binary_labels = (true_labels == i).astype(int)
    probabilities = predictions.predictions[:, i]

    fpr, tpr, _ = roc_curve(binary_labels, probabilities)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"Class {i} (AUC = {roc_auc:.2f})")

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Each Class")
plt.legend(loc="lower right")
plt.show()

In [None]:
from huggingface_hub import login
login()

In [None]:
# push model to hub
trainer.push_to_hub()