In [46]:
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd

# Load your dataset
file_path = 'fr_aides.csv'
df = pd.read_csv(file_path, sep=';', error_bad_lines=False)

# Encode string labels to integers
tag_to_label = {tag: idx for idx, tag in enumerate(df['tag'].unique())}
df['labels'] = df['tag'].map(tag_to_label)

# Save the mapping to a JSON file
with open('tag_to_label.json', 'w') as f:
    json.dump(tag_to_label, f)

# Load the mapping from the JSON file
with open('tag_to_label.json', 'r') as f:
    tag_to_label = json.load(f)

label_to_tag = {v: k for k, v in tag_to_label.items()}

# Load the model and tokenizer
model_name = "MartaTT/model11epochs"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

device = torch.device('cpu')
model.to(device)

# Test if the model is loaded correctly by making a single prediction
test_input = "Accompagner les projets de développement d'hôtellerie indépendante"
inputs = tokenizer(test_input, return_tensors='pt', padding=True, truncation=True)
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=1)
test_label = label_to_tag[predictions.item()]
print(f"Test prediction for input '{test_input}': {test_label}")

def predict(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=1)
    text_label = label_to_tag[predictions.item()]
    return text_label

def test_model(texts, true_labels):
    predicted_labels = [predict(text) for text in texts]
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

test_texts = df['projets'].tolist()
true_labels = df['tag'].tolist()

results = test_model(test_texts, true_labels)
print(results)




  df = pd.read_csv(file_path, sep=';', error_bad_lines=False)


Test prediction for input 'Accompagner les projets de développement d'hôtellerie indépendante': Subvention
{'accuracy': 0.983991462113127, 'precision': 0.9799371652218385, 'recall': 0.983991462113127, 'f1': 0.9794458546179237}


  _warn_prf(average, modifier, msg_start, len(result))
