In [None]:
!pip install -q datasets

In [None]:
from datasets import load_dataset

dataset = load_dataset("mrfire15/CodeforcesProblems")
dataset = dataset['test']

#only for bert
dataset = dataset.map(lambda x: {"Problem Description": x["Problem Description"].lower()})

In [None]:
labels = [label for label in dataset.features.keys() if label not in ['Problem ID', 'Problem Description', 'Rating','__index_level_0__']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
labels

In [None]:
id2label

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "mrfire15/cf-bert-finetuned1" #change for other models

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                           problem_type="multi_label_classification",
                                                           num_labels=len(labels),
                                                           id2label=id2label,
                                                           label2id=label2id)

In [None]:
model

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm


batch_size = 16

class TextDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        text = item["Problem Description"]
        labels = np.array([item['math'], item['greedy'], item['implementation'], 
                           item['dp'], item['data structures'], item['constructive algorithms'], 
                           item['brute force'], item['binary search'], item['sortings'], 
                           item['graphs']])
        return text, labels


text_dataset = TextDataset(dataset)
dataloader = DataLoader(text_dataset, batch_size=batch_size, shuffle=False)

true_labels = []
predictions = []


for batch in tqdm(dataloader):
    texts, labels = batch
    true_labels.extend(labels.numpy()) 

    inputs = tokenizer(list(texts), return_tensors="pt", truncation=True, padding=True, max_length=512)

    if torch.cuda.is_available():
        model.cuda()
        inputs = {key: val.cuda() for key, val in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits.cpu() if torch.cuda.is_available() else outputs.logits

    
    probabilities = torch.sigmoid(logits).numpy()
    batch_predictions = (probabilities >= 0.5).astype(int)

    predictions.extend(batch_predictions)


true_labels = np.array(true_labels)
predictions = np.array(predictions)

overall_accuracy = accuracy_score(true_labels.flatten(), predictions.flatten())
overall_recall = recall_score(true_labels, predictions, average="micro")
overall_f1 = f1_score(true_labels, predictions, average="micro")
overall_precision = precision_score(true_labels, predictions, average="micro")

per_label_recall = recall_score(true_labels, predictions, average=None)
per_label_f1 = f1_score(true_labels, predictions, average=None)
per_label_precision = precision_score(true_labels, predictions, average=None)

label_mapping = {
    0: 'math',
    1: 'greedy',
    2: 'implementation',
    3: 'dp',
    4: 'data structures',
    5: 'constructive algorithms',
    6: 'brute force',
    7: 'binary search',
    8: 'sortings',
    9: 'graphs'
}


print("Overall Metrics:")
print(f"Accuracy: {overall_accuracy:.4f}")
print(f"Recall: {overall_recall:.4f}")
print(f"F1 Score: {overall_f1:.4f}")
print(f"Precision: {overall_precision:.4f}\n")

print("Per Label Metrics and Confusion Matrices:")
for i, label in label_mapping.items():
    print(f"\n{label}:")
    print(f"  Recall: {per_label_recall[i]:.4f}")
    print(f"  F1 Score: {per_label_f1[i]:.4f}")
    print(f"  Precision: {per_label_precision[i]:.4f}")
    
    cm = confusion_matrix(true_labels[:, i], predictions[:, i])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[f"Not {label}", label])
    disp.plot(cmap=plt.cm.Blues)
    plt.title(f"Confusion Matrix for {label}")
    plt.show()
