In [1]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("EliottClavier/bert-finetuned-text-classification-trip")

In [3]:
from transformers import AutoModelForSequenceClassification

model_test = AutoModelForSequenceClassification.from_pretrained("EliottClavier/bert-finetuned-text-classification-trip")

In [4]:
# Get IDs to label from loaded model
id2label = model_test.config.id2label

In [16]:
import torch
import numpy as np

sentences = [
    {"id": 1, "text": "Je voudrais aller à Paris en partant de Nantes."},
    {"id": 2, "text": "Je voudrais aller à Paris en partant de Lyon ou Nantes"},
    {"id": 3, "text": "I would like to go to Paris from Lyon"},
    {"id": 4, "text": "I like apples"},
    {"id": 5, "text": "J'aime me balader dans les rues de Paris"},
    {"id": 6, "text": "ze zareazrreaz rzearzear"}
]

for sentence in sentences:

    # Tokenize the text
    inputs = tokenizer(sentence["text"], return_tensors="pt")
    
    # Make prediction
    with torch.no_grad():
        outputs = model_test(**inputs)
    
    # Apply sigmoid + threshold
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(outputs.logits.squeeze().cpu())
    predictions = np.zeros(probs.shape)
    # Get highest probability index
    predictions[probs.argmax()] = 1.0
    
    # Turn predicted id's into actual label names
    predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
    sentence["prediction"] = predicted_labels[0]
    
    if sentence["prediction"] == "CORRECT":
        print(f"{sentence['id']},{sentence['text']}")
    else:
        print(f"{sentence['id']},{sentence['prediction']}")


1,Je voudrais aller à Paris en partant de Nantes.
2,NOT_TRIP
3,NOT_FRENCH
4,NOT_FRENCH
5,NOT_TRIP
6,UNKNOWN
