In [1]:
import pandas as pd
import torch
from torch.nn.functional import softmax
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification

In [2]:
def predict_class(test_file_path, language):
    
    # Load XLM-RoBERTa tokenizer and model
    model_path = "../fine_tuned_models/fine_tuned_xlm_roberta_model_"+language
    tokenizer = XLMRobertaTokenizer.from_pretrained(model_path)
    model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base')
    
    # Load test data
    test_data = pd.read_csv(test_file_path)
    
    # Prepare inputs for the model
    inputs = tokenizer(test_data['tweet_text'].tolist(), padding=True, truncation=True, return_tensors='pt')
    
    # Make predictions
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Apply softmax to get predicted probabilities
    probabilities = softmax(outputs.logits, dim=1)
    
    # Get predicted class labels based on probability threshold
    threshold = 0.5 
    predicted_labels = ['yes' if prob[1] > threshold else 'no' for prob in probabilities]
    
    # Add predicted labels to the test data
    test_data['predicted_label'] = predicted_labels
    
    # Save results to CSV
    test_data[['tweet_id', 'predicted_label']].to_csv("output_transformer_"+language, index=False)


In [3]:

eng_test_file_path = "../CT24_checkworthy_english_dev-test_preprocessed.csv"
predict_class(test_file_path, "english")

OSError: Can't load tokenizer for 'fine_tuned_xlm_roberta_model'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'fine_tuned_xlm_roberta_model' is the correct path to a directory containing all relevant files for a XLMRobertaTokenizer tokenizer.