In [None]:
%pip install transformers torch datasets evaluate accelerate timm kagglehub pandas seaborn evaluate

In [None]:
# Imports
import pandas as pd
import numpy as np
import torch
import evaluate
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import Dataset

# Détection du device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

# Chemins vers les répertoires des modèles locaux (présents dans le workspace)
model1_dir = 'humor_detection_model01'
model2_dir = 'humor_model_multilingual'

# Chargement des tokenizers et modèles (sequence classification)
# from_pretrained acceptera le dossier contenant config.json et les poids (model.safetensors)
try:
    tokenizer1 = AutoTokenizer.from_pretrained(model1_dir)
    model1 = AutoModelForSequenceClassification.from_pretrained(model1_dir).to(device)
    print('Loaded model1 from', model1_dir)
except Exception as e:
    print('Erreur lors du chargement du model1:', e)
    print('Vérifiez que le dossier', model1_dir, 'contient config.json et model.safetensors ou les fichiers de poids attendus.')

try:
    tokenizer2 = AutoTokenizer.from_pretrained(model2_dir)
    model2 = AutoModelForSequenceClassification.from_pretrained(model2_dir).to(device)
    print('Loaded model2 from', model2_dir)
except Exception as e:
    print('Erreur lors du chargement du model2:', e)
    print('Vérifiez que le dossier', model2_dir, 'contient config.json et model.safetensors ou les fichiers de poids attendus.')

# Fonction d'inférence simple: renvoie la classe prédite et les probabilités
import torch.nn.functional as F
def predict(texts, tokenizer, model, device, return_probs=True):
    if isinstance(texts, str):
        texts = [texts]
    model.eval()
    with torch.no_grad():
        inputs = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
        inputs = {k: v.to(device) for k, v in inputs.items()}
        outputs = model(**inputs)
        logits = outputs.logits
        probs = F.softmax(logits, dim=-1).cpu().numpy()
        preds = probs.argmax(axis=-1)
    if return_probs:
        return preds, probs
    return preds

examples = [
    "I told my computer I needed a break, and it said no problem — it crashed.",
    "Why don't scientists trust atoms? Because they make up everything!",
    "This is a test sentence.",
    "Ce texte est sérieux et pas drôle.",
    "Pourquoi les plongeurs plongent-ils toujours en arrière et jamais en avant ? Parce que sinon ils tombent dans le bateau."
]

if 'model1' in globals() and 'tokenizer1' in globals():
    preds1, probs1 = predict(examples, tokenizer1, model1, device)
    print('\nResults for model1 (humor_detection_model01):')
    for t, p, prob in zip(examples, preds1, probs1):
        print(f'Example: {t}')
        print(f'  Predicted class: {p} | probs: {prob}')

if 'model2' in globals() and 'tokenizer2' in globals():
    preds2, probs2 = predict(examples, tokenizer2, model2, device)
    print('\nResults for model2 (humor_model_multilingual):')
    for t, p, prob in zip(examples, preds2, probs2):
        print(f'Example: {t}')
        print(f'  Predicted class: {p} | probs: {prob}')

# Note: les indices de classes (0/1/...) dépendent de la configuration du modèle.
# Si vous avez des étiquettes textuelles, mappez-les avec model.config.id2label
if 'model1' in globals():
    try:
        print('\nModel1 id2label:', model1.config.id2label)
    except Exception:
        pass
if 'model2' in globals():
    try:
        print('Model2 id2label:', model2.config.id2label)
    except Exception:
        pass