In [1]:
import torch
from adapters import AutoAdapterModel
from transformers import AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = AutoAdapterModel.from_pretrained("roberta-base").to(device)

In [2]:
adapter_name = model.load_adapter("./final_adapter", set_active=True)
model.set_active_adapters(adapter_name)

In [3]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=device)

classifier("What are the tax rates in Russia?")

In [4]:
from datasets import load_dataset

id2label = {
    0: "finance",
    1: "medicine",
    2: "leetcode",
    3: "exam",
    4: "webgpt",
    5: "gpt4tools",
    6: "cot",
    7: "stackoverflow",
}

# id2label = {
#     0: "finance",
#     1: "medicine",
#     2: "cot",
#     3: "stackoverflow",
# }

eval_file = "./cache/unified_eval.jsonl"
dataset_eval = load_dataset("json", split="train", data_files=eval_file)
labels = dataset_eval.unique("labels")
domains = [id2label[label] for label in labels]
domains

In [5]:
import pandas as pd
from datasets import tqdm as hf_tqdm

# Initialize results storage
results = []

# Evaluate the classifier on each domain
for label in hf_tqdm(labels):
    domain = id2label[label]
    domain_dataset = dataset_eval.filter(lambda x: x['labels'] == label)
    domain_size = len(domain_dataset)
    
    correct_classifications = 0
    for record in hf_tqdm(domain_dataset):
        prediction = classifier(record["text"], truncation=True, max_length=512)[0]["label"]
        if prediction.lower() == domain:
            correct_classifications += 1

    classifier_accuracy = correct_classifications / domain_size * 100

    # Add results to the list
    results.append({
        "Domain": domain.upper(),
        "test size": domain_size,
        "Classifier": f"{classifier_accuracy:.2f}%",
        "Router": "N/A"  # Placeholder as Router results are not provided
    })

# Calculate average
average_size = sum([result["test size"] for result in results]) / len(results)
average_classifier_accuracy = sum(
    [float(result["Classifier"].strip('%')) for result in results]
) / len(results)

# Add the average row
results.append({
    "Domain": "Average",
    "test size": average_size,
    "Classifier": f"{average_classifier_accuracy:.2f}%",
    "Router": "N/A"  # Placeholder as Router results are not provided
})

# Convert to DataFrame
results_df = pd.DataFrame(results)

# Display the table
print(results_df)

# Save to CSV
results_df.to_csv("./evaluation_results.csv", index=False)