In [9]:
import duckdb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

In [2]:
con = duckdb.connect(':memory:')

In [3]:
# load in test sets
test_huffpo = con.execute("SELECT * FROM '../../data/processed/huffpo_test.csv' ").fetch_df()
test_recognasumm = con.execute("SELECT * FROM '../../data/processed/recognasumm_test.csv' ").fetch_df()
test_uci = con.execute("SELECT * FROM '../../data/processed/uci_test.csv' ").fetch_df()

# BERT

In [None]:
def evaluate_bert(dataset):
    data = con.execute(f"SELECT * FROM '../../data/processed/{dataset}_test.csv' ").fetch_df()
    tokenizer = DistilBertTokenizerFast.from_pretrained(f"../../models/bert/{dataset}")
    model = DistilBertForSequenceClassification.from_pretrained(f"../../models/bert/{dataset}")
    label_encoder = torch.load(f"../../models/bert/{dataset}/label_encoder.pt")

    texts = data['x'].tolist()
    # Tokenize the input
    encodings = tokenizer(texts, truncation=True, padding=True, return_tensors="pt")

    # Create a DataLoader for batch processing
    dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'])
    dataloader = DataLoader(dataset, batch_size=64)  # Adjust batch size as needed

    # Run inference
    model.eval()
    predictions = []

    with torch.no_grad():
        for batch in tqdm(dataloader, total=len(dataloader)):
            input_ids, attention_mask = batch
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predicted_classes = torch.argmax(logits, dim=1)
            predictions.extend(predicted_classes.tolist())

    # Add predictions to the DataFrame
    data['y_pred'] = predictions

    # Encode the labels
    data["y_encoded"] = label_encoder.transform(data["y"])

    # Calculate metrics
    accuracy = accuracy_score(data["y_encoded"], data["y_pred"])
    precision = precision_score(data["y_encoded"], data["y_pred"], average="macro")
    recall = recall_score(data["y_encoded"], data["y_pred"], average="macro")
    f1 = f1_score(data["y_encoded"], data["y_pred"], average="macro")

    return {
        "model": "bert",
        "dataset": dataset,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }



# Distant labeling