In [None]:
%pip install transformers torch datasets evaluate accelerate timm kagglehub pandas seaborn evaluate

In [None]:
# Imports
import os
import json
import pandas as pd
import numpy as np
import torch
import evaluate
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import Dataset

# Détection du device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

# Chemins vers les répertoires des modèles locaux (présents dans le workspace)
model1_dir = 'humor_detection_model01'
model2_dir = 'humor_model_multilingual'

def load_tokenizer_with_fallback(model_dir, fallback_name=None):
    try:
        print(f"Trying to load tokenizer from local dir: {model_dir} (local_files_only=True)")
        return AutoTokenizer.from_pretrained(model_dir, local_files_only=True)
    except Exception as e:
        print('Local tokenizer load failed:', e)
        if fallback_name is not None:
            try:
                print('Trying fallback tokenizer from Hugging Face:', fallback_name)
                return AutoTokenizer.from_pretrained(fallback_name, local_files_only=False)
            except Exception as e2:
                print('Fallback tokenizer failed:', e2)
        print('No tokenizer available for', model_dir)
        return None

def load_model_with_fallback(model_dir):
    try:
        print(f"Trying to load model from local dir: {model_dir} (local_files_only=True)")
        return AutoModelForSequenceClassification.from_pretrained(model_dir, local_files_only=True).to(device)
    except Exception as e:
        print('Local model load failed:', e)
        # If a safetensors file exists, try to load it with safetensors.torch
        safetensors_path = os.path.join(model_dir, 'model.safetensors')
        if os.path.exists(safetensors_path):
            try:
                print('Found model.safetensors — attempting safetensors load')
                # load config and build model skeleton
                cfg = AutoConfig.from_pretrained(model_dir, local_files_only=True)
                model = AutoModelForSequenceClassification.from_config(cfg).to(device)
                try:
                    from safetensors.torch import load_file as safe_load
                except Exception as e2:
                    print('safetensors package not installed. Install with: %pip install safetensors')
                    raise e2
                state = safe_load(safetensors_path, device=device)
                # state is a dict of tensors; load into model (allow strict=False for missing keys)
                model.load_state_dict(state, strict=False)
                print('Loaded weights from safetensors into model object')
                return model
            except Exception as e3:
                print('Failed to load model from safetensors:', e3)
        print('No usable model found in', model_dir)
        return None

# Choisir des fallback tokenizers raisonnables
# model1 semble avoir vocab_size 30522 -> distilbert-base-uncased est un bon candidat
# model2 a un vocab_size plus grand -> utiliser distilbert-base-multilingual-cased comme fallback
fallback_tokenizer1 = 'distilbert-base-uncased'
fallback_tokenizer2 = 'distilbert-base-multilingual-cased'

# Charger tokenizers et modèles avec stratégies de repli
tokenizer1 = load_tokenizer_with_fallback(model1_dir, fallback_tokenizer1)
model1 = load_model_with_fallback(model1_dir)
if model1 is None:
    print('model1 not loaded — vérifiez le contenu du dossier ou installez safetensors si nécessaire')
tokenizer2 = load_tokenizer_with_fallback(model2_dir, fallback_tokenizer2)
model2 = load_model_with_fallback(model2_dir)
if model2 is None:
    print('model2 not loaded — vérifiez le contenu du dossier ou installez safetensors si nécessaire')

# Fonction d'inférence simple: renvoie la classe prédite et les probabilités
import torch.nn.functional as F
def predict(texts, tokenizer, model, device, return_probs=True):
    if tokenizer is None or model is None:
        raise ValueError('tokenizer et model doivent être fournis')
    if isinstance(texts, str):
        texts = [texts]
    model.eval()
    with torch.no_grad():
        inputs = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
        inputs = {k: v.to(device) for k, v in inputs.items()}
        outputs = model(**inputs)
        logits = outputs.logits
        probs = F.softmax(logits, dim=-1).cpu().numpy()
        preds = probs.argmax(axis=-1)
    if return_probs:
        return preds, probs
    return preds

# Quelques exemples de test
examples = [
    "I told my computer I needed a break, and it said no problem — it crashed.",
    "Why don't scientists trust atoms? Because they make up everything!",
    "This is a test sentence.",
    "Ce texte est sérieux et pas drôle.",
    "Pourquoi les plongeurs plongent-ils toujours en arrière et jamais en avant ? Parce que sinon ils tombent dans le bateau."
 ]

# Exécuter les tests si possible
if tokenizer1 is not None and model1 is not None:
    try:
        preds1, probs1 = predict(examples, tokenizer1, model1, device)
        print('\nResults for model1 (humor_detection_model01):')
        for t, p, prob in zip(examples, preds1, probs1):
            print(f'Example: {t}')
            print(f'  Predicted class: {p} | probs: {prob}')
    except Exception as e:
        print('Erreur lors de la prédiction avec model1:', e)

if tokenizer2 is not None and model2 is not None:
    try:
        preds2, probs2 = predict(examples, tokenizer2, model2, device)
        print('\nResults for model2 (humor_model_multilingual):')
        for t, p, prob in zip(examples, preds2, probs2):
            print(f'Example: {t}')
            print(f'  Predicted class: {p} | probs: {prob}')
    except Exception as e:
        print('Erreur lors de la prédiction avec model2:', e)

# Afficher id2label si disponible
if model1 is not None:
    try:
        print('\nModel1 id2label:', model1.config.id2label)
    except Exception:
        pass
if model2 is not None:
    try:
        print('Model2 id2label:', model2.config.id2label)
    except Exception:
        pass

In [None]:
# Comparaison des deux modèles: chargement avec from_pretrained et prédictions sur exemples fournis
from transformers import AutoModelForSequenceClassification
texts = [
    "j'ai faim",
    "Quelle mamie fait peur aux voleurs ? Mamie Traillette.",
    "Pourquoi les plongeurs plongent-ils toujours en arrière et jamais en avant ? Parce que sinon ils tombent dans le bateau.",
    "Quel est le comble pour un électricien ? De ne pas être au courant.",
    "Pourquoi les squelettes ne se battent-ils jamais entre eux ? Ils n'ont pas le cran.",
    "소 잃고 외양간 고친다",
    "백지장도 맞들면 낫다",
    "¿Por qué los pájaros no usan Facebook? Porque ya tienen Twitter.",
 ]

def run_simple_pipeline(model_dir, tokenizer):
    print(f"\n--- Running pipeline for {model_dir} ---")
    try:
        model = AutoModelForSequenceClassification.from_pretrained(model_dir).to(device)
        inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
        inputs = {k:v.to(device) for k,v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
        predictions = torch.argmax(outputs.logits, dim=-1)
        for text, pred in zip(texts, predictions):
            label = 'Humor' if pred.item() == 1 else 'Not Humor'
            print(f"Text: {text}\nPrediction: {label}\n")
    except Exception as e:
        print('Erreur pipeline pour', model_dir, ':', e)

# Exécuter la pipeline pour model1 et model2 (utiliser tokenizer1/tokenizer2 si disponibles)
if tokenizer1 is not None:
    run_simple_pipeline('humor_detection_model01', tokenizer1)
else:
    print('tokenizer1 non disponible — impossible d exécuter pipeline pour model1')
if tokenizer2 is not None:
    run_simple_pipeline('humor_model_multilingual', tokenizer2)
else:
    print('tokenizer2 non disponible — impossible d exécuter pipeline pour model2')