In [65]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
from datasets import Dataset
from transformers import BertTokenizer
import torch

# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained('./nepali_news_mbert_fine_tuned_model',num_labels=16)
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
print("Number of labels in model configuration:", model.config.num_labels)


Number of labels in model configuration: 16


In [71]:
import pandas as pd

# Load the test data from CSV
test_df = pd.read_csv('3_split_data/validation.csv')

test_dataset = Dataset.from_pandas(test_df)

# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples['content'], padding="max_length", truncation=True)

test_dataset = test_dataset.map(tokenize_function, batched=True)
print(test_dataset)

from transformers import Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import torch
import numpy as np

def compute_metrics(pred):
    predictions, labels = pred
    predictions = predictions.argmax(axis=1)  # Get the predicted class for multiclass

    # Calculate accuracy
    accuracy = accuracy_score(labels, predictions)
    
    # Calculate precision, recall, and f1 score for each class
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average=None, zero_division=0)
    
    # Calculate weighted averages
    weighted_precision, weighted_recall, weighted_f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted', zero_division=0)
    
    # Calculate confusion matrix
    conf_matrix = confusion_matrix(labels, predictions)
    
    # Prepare results in a dictionary
    metrics = {
        'accuracy': accuracy,
        'precision_per_class': precision,
        'recall_per_class': recall,
        'f1_per_class': f1,
        'weighted_precision': weighted_precision,
        'weighted_recall': weighted_recall,
        'weighted_f1': weighted_f1,
        'confusion_matrix': conf_matrix
    }
    
    return metrics

# Define training arguments for evaluation
training_args = TrainingArguments(
    output_dir='./results',
    per_device_eval_batch_size=8,
    eval_strategy="no",
    logging_dir='./logs',
)

# Initialize the Trainer for evaluation
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics
)

# Evaluate the model
results = trainer.evaluate(eval_dataset=test_dataset)

# Print per-class results
print("Results per class:")
for i, (precision, recall, f1) in enumerate(zip(results['eval_precision_per_class'], results['eval_recall_per_class'], results['eval_f1_per_class'])):
    print(f"Class {i}: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}")

print("\nOverall results:")
print(f"Weighted Precision: {results['eval_weighted_precision']:.4f}")
print(f"Weighted Recall: {results['eval_weighted_recall']:.4f}")
print(f"Weighted F1: {results['eval_weighted_f1']:.4f}")
print(f"Accuracy: {results['eval_accuracy']:.4f}")

# Print confusion matrix
print("\nConfusion Matrix:")
print(results['eval_confusion_matrix'])

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Dataset({
    features: ['content', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 837
})


Results per class:
Class 0: Precision=0.6400, Recall=0.6957, F1=0.6667
Class 1: Precision=0.7778, Recall=0.7850, F1=0.7814
Class 2: Precision=0.0000, Recall=0.0000, F1=0.0000
Class 3: Precision=0.8000, Recall=0.5714, F1=0.6667
Class 4: Precision=0.0000, Recall=0.0000, F1=0.0000
Class 5: Precision=0.8163, Recall=0.8333, F1=0.8247
Class 6: Precision=0.7738, Recall=0.8125, F1=0.7927
Class 7: Precision=0.7273, Recall=0.8889, F1=0.8000
Class 8: Precision=0.7534, Recall=0.7746, F1=0.7639
Class 9: Precision=0.6400, Recall=0.5926, F1=0.6154
Class 10: Precision=0.8664, Recall=0.9511, F1=0.9068
Class 11: Precision=0.8235, Recall=0.4667, F1=0.5957
Class 12: Precision=0.5000, Recall=0.5714, F1=0.5333
Class 13: Precision=0.0000, Recall=0.0000, F1=0.0000
Class 14: Precision=0.0000, Recall=0.0000, F1=0.0000
Class 15: Precision=0.9259, Recall=0.9091, F1=0.9174

Overall results:
Weighted Precision: 0.7815
Weighted Recall: 0.8029
Weighted F1: 0.7892
Accuracy: 0.8029

Confusion Matrix:
[[ 16   2   0   0 