In [1]:
%%writefile app.py
import pandas as pd
from datasets import load_dataset
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# -------- Chargement du dataset --------
dataset = load_dataset("SetFit/amazon_reviews_multi_fr", split="train[:1000]")
df = pd.DataFrame(dataset)

# -------- Nettoyage --------
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-zàâéèêëîïôùûç0-9\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

df["texte_clean"] = df["text"].apply(clean_text)

# -------- Attribution sentiment --------
def label_sentiment(label):
    if label == 1:
        return "positive"
    elif label == 2:
        return "negative"
    return "neutral"

df["sentiment"] = df["label"].apply(label_sentiment)

# -------- Modèle Mistral --------
model_name = "mistralai/Mistral-7B-Instruct-v0.3"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

gen_pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=120
)

def generer_reponse(texte, sentiment="negative"):
    prompt = f"Le client a écrit : {texte}\nSentiment : {sentiment}\nRédige une réponse polie et professionnelle."
    return gen_pipe(prompt)[0]["generated_text"]

print(df.head())


Writing app.py
