In [None]:
%pip install transformers torch datasets evaluate accelerate timm kagglehub pandas seaborn

In [None]:
# Génération + filtrage (mode CPU pour débogage)
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

device = torch.device('cpu')  # START with CPU to avoid CUDA errors; change to 'cuda' si stable

# -------- génération (remplacez si vous avez un LM local) ----------
gen_name = 'gpt2'  # remplacez par votre modèle génératif FR si vous en avez
tok_gen = AutoTokenizer.from_pretrained(gen_name)
if tok_gen.pad_token is None:
    tok_gen.pad_token = tok_gen.eos_token
model_gen = AutoModelForCausalLM.from_pretrained(gen_name).to(device)

def generate_jokes(prompt, n=8, max_len=80, temp=0.9, top_p=0.95):
    inputs = tok_gen(prompt, return_tensors='pt').to(device)
    out = model_gen.generate(**inputs, do_sample=True, temperature=temp, top_p=top_p, max_length=max_len, num_return_sequences=n, pad_token_id=tok_gen.eos_token_id)
    results = [tok_gen.decode(o, skip_special_tokens=True)[len(prompt):].strip() for o in out]
    return results

# -------- charger vos classifieurs (ils n'ont que config+poids, on utilise fallback tokenizer) ----------
from transformers import AutoTokenizer
import os, json

def load_model_and_tokenizer_classif(path):
    path = os.path.abspath(path)
    model = AutoModelForSequenceClassification.from_pretrained(path).to(device)
    try:
        tok = AutoTokenizer.from_pretrained(path)
    except Exception:
        # fallback heuristique
        tok = AutoTokenizer.from_pretrained('distilbert-base-multilingual-cased')
    return model, tok

m1, t1 = load_model_and_tokenizer_classif('./humor_detection_model01')
m2, t2 = load_model_and_tokenizer_classif('./humor_model_multilingual')

# -------- scoring manuel (sans pipeline) ----------
import torch.nn.functional as F

def score_with_model(texts, model, tokenizer):
    scores = []
    model.eval()
    for t in texts:
        enc = tokenizer(t, truncation=True, padding=True, return_tensors='pt', return_token_type_ids=False)
        enc = {k: v.to(device) for k, v in enc.items()}
        with torch.no_grad():
            logits = model(**enc).logits
            probs = F.softmax(logits, dim=-1).cpu().numpy()[0]
        # heuristic: if id2label exists, try to map; else use LABEL_1 or max
        id2label = getattr(model.config, 'id2label', None)
        if id2label:
            labels = [id2label[i] for i in range(len(probs))]
        else:
            labels = [f'LABEL_{i}' for i in range(len(probs))]
        probs_dict = {labels[i]: float(probs[i]) for i in range(len(probs))}
        h = None
        for L, p in probs_dict.items():
            if 'humor' in L.lower() or 'humour' in L.lower():
                h = p
                break
        if h is None:
            h = probs_dict.get('LABEL_1', max(probs_dict.values()))
        scores.append(float(h))
    return scores

# -------- usage ----------
prompt = "Génère une blague courte en français :\n"
candidates = generate_jokes(prompt, n=8)
scores1 = score_with_model(candidates, m1, t1)
scores2 = score_with_model(candidates, m2, t2)
combined = [(a + b) / 2.0 for a, b in zip(scores1, scores2)]
order = np.argsort(combined)[::-1]
for i in order:
    print("SCORE", combined[i])
    print(candidates[i])
    print("-----")