In [1]:
import json
import random
import torch
from pathlib import Path
from transformers import AutoModelForCausalLM, AutoTokenizer
from rich.console import Console
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.rule import Rule
from rich.panel import Panel
from rich.table import Table
from rich.live import Live

In [2]:
MODEL_ID = "Qwen/Qwen3-0.6B"
DATA_PATH = Path("..") / "data" / "TOSDR" / "tosdr_markdowns_en.jsonl"
console = Console()

In [3]:
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype="auto", # BF16 en 2026 est la norme
        device_map="auto"
    )
    return model, tokenizer

In [4]:
def get_summary(model, tokenizer, text):
    # On ajoute /no_think pour être sûr que le modèle ne dévie pas
    prompt = f"Analyze these Terms of Service and list key clauses with [GOOD], [NEUTRAL], [BAD], or [BLOCKER]. /no_think\n\nDocument:\n{text[:2000]}"
    
    messages = [{"role": "user", "content": prompt}]
    
    # DÉSACTIVATION DU MODE THINKING
    input_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False  # <--- Le switch magique
    )
    
    inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
    
    # Paramètres optimisés pour le mode non-thinking (selon doc Qwen3)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7, 
        top_p=0.8,
        do_sample=True
    )
    
    return tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()

In [5]:
model, tokenizer = load_model()

with open(DATA_PATH, "r", encoding="utf-8") as f:
    samples = [json.loads(line) for line in f if "success" in line]

`torch_dtype` is deprecated! Use `dtype` instead!


In [None]:
for _ in range(3):
    item = random.choice(samples)
    console.print(f"\n[bold green]Test Qwen - Service: {item['service_name']}[/bold green]")
    # print le markdown initial 
    console.print(Panel(item['markdown'], title=f'Markdown brut - {item["service_name"]}', border_style="green"))

    with console.status("[bold magenta]Génération en cours..."):
        result = get_summary(model, tokenizer, item['markdown'])

    # Création d'un tableau pour un rendu "ToS;dr"
    table = Table(title=f"Rapport de clauses : {item['service_name']}", border_style="cyan")
    table.add_column("Analyse du modèle", style="white")

    # On sépare les lignes pour un affichage propre
    for line in result.split('\n'):
        if line.strip():
            table.add_row(line.strip())
            
    console.print(table)

Output()