<a href="https://colab.research.google.com/github/Naomie25/Hackaton-Fashion-Description-Generator/blob/Version1-27%2F07/Version1_27_07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install schedule

Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Downloading schedule-1.2.2-py3-none-any.whl (12 kB)
Installing collected packages: schedule
Successfully installed schedule-1.2.2


In [4]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [13]:
import torch
from transformers import (
    pipeline,
    set_seed,
    BartTokenizer,
    BartForConditionalGeneration
)
import difflib
import re
import schedule
import time
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
import numpy as np

# ============================
# 1. Configuration Générale
# ============================
device = torch.device("cpu")
print("✅ Device set to use", device)

generator = pipeline('text-generation', model='distilgpt2', device=-1)
set_seed(42)

bart_model_name = "facebook/bart-base"
bart_tokenizer = BartTokenizer.from_pretrained(bart_model_name)
bart_model = BartForConditionalGeneration.from_pretrained(bart_model_name).to(device)

fashion_keywords = [
    "elegant", "stylish", "refined", "modern", "vintage", "casual",
    "minimalist", "chic", "versatile", "comfort", "premium", "crafted",
    "tailored", "cut", "fit", "fabric", "soft", "bold", "timeless"
]

# ============================
# 2. Génération de descriptions
# ============================
def generate_descriptions(keyword, num_variants=5):
    prompt = f"""*Item:* {keyword}\n*Description:*"""

    outputs = generator(
        prompt,
        max_new_tokens=120,
        num_return_sequences=num_variants,
        temperature=0.75,
        top_p=0.9,
        no_repeat_ngram_size=2,
        early_stopping=True
    )

    results = []
    for output in outputs:
        gen_text = output["generated_text"]
        description_start = gen_text.find("*Description:*") + len("*Description:*")
        description_text = gen_text[description_start:].strip()
        score = score_description(description_text, prompt)
        results.append((description_text, score))

    results = clean_descriptions(results)
    return results

# ============================
# 3. Résumé qualité (BART)
# ============================
def summarize_text(text):
    inputs = bart_tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    summary_ids = bart_model.generate(inputs["input_ids"], num_beams=4, max_length=30, early_stopping=True)
    summary = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# ============================
# 4. Filtrage Éthique
# ============================
def ethical_filter(text):
    blacklist = ["hate", "violence", "racism", "sexism", "terrorism"]
    text_lower = text.lower()
    return not any(bad_word in text_lower for bad_word in blacklist)

# ============================
# 5. Score et Nettoyage
# ============================
def has_repetitions(text, max_repeat=3):
    pattern = r'\b(\w+)( \1){' + str(max_repeat) + ',}\b'
    return re.search(pattern, text.lower()) is not None

def clean_descriptions(descriptions):
    filtered = []
    for desc, score in descriptions:
        if len(desc.split()) < 8:
            continue
        if has_repetitions(desc):
            continue
        filtered.append((desc, score))
    return filtered

def score_description(desc, prompt):
    words = desc.lower().split()
    keyword_bonus = sum(word in words for word in fashion_keywords)
    length_score = min(len(words), 50) / 50
    similarity = difflib.SequenceMatcher(None, desc.lower(), prompt.lower()).ratio()
    penalty = max(0, 1 - similarity)
    return length_score + 0.5 * keyword_bonus + penalty

# ============================
# 6. Pipeline principal
# ============================
def run_pipeline(keyword, num_variants=5):
    print(f"\n--- Génération pour: {keyword} ---")
    descriptions = generate_descriptions(keyword, num_variants)

    final_results = []
    for desc, score in descriptions:
        summary = summarize_text(desc)
        if not ethical_filter(desc):
            print("❌ Rejeté (filtre éthique):", desc)
            continue
        final_results.append((desc, summary, score))

    for i, (desc, summary, score) in enumerate(final_results, 1):
        print(f"\n✅ Description {i} [Score: {score:.2f}]:\n{desc}")
        print(f"📝 Résumé qualité:\n{summary}")

    return final_results

# ============================
# 7. Documentation pipeline
# ============================
def document_pipeline():
    print("""
📌 Pipeline IA - Générateur de descriptions mode
Étapes :
1. Prompt → DistilGPT2 → Génération brute
2. Résumé avec BART → Vérifie la qualité
3. Filtrage éthique simple
4. Score = longueur + mots-clés + originalité
Utilisation : run_pipeline("mot-clé")
""")

# ============================
# 8. Automatisation (schedule)
# ============================
def scheduled_run():
    keyword = "black leather boots"
    print("\n📅 Tâche planifiée lancée...")
    run_pipeline(keyword)

# ============================
# 9. Évaluation du système
# ============================
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge import Rouge
import numpy as np

def evaluate_system(keywords, generated_descriptions, references):
    """
    Évalue les descriptions déjà générées.
    """
    smoothie = SmoothingFunction().method4
    bleu_scores = []
    rouge_scores = []
    rouge = Rouge()

    for keyword, gen, ref in zip(keywords, generated_descriptions, references):
        print(f"\n🔍 Évaluation pour : {keyword}")

        if gen is None:
            print("⚠️ Aucune description générée.")
            continue

        # Si ref est une liste → convertir en string
        if isinstance(ref, list):
            ref = " ".join(ref)

        print("✅ Description générée :", gen)
        print("🎯 Référence :", ref)

        bleu = sentence_bleu([ref.split()], gen.split(), smoothing_function=smoothie)
        rouge_l = rouge.get_scores(gen, ref)[0]["rouge-l"]["f"]

        bleu_scores.append(bleu)
        rouge_scores.append(rouge_l)

    if bleu_scores and rouge_scores:
        print("\n📊 Évaluation :")
        print(f"BLEU Score moyen : {np.mean(bleu_scores):.4f}")
        print(f"ROUGE-L moyen : {np.mean(rouge_scores):.4f}")
    else:
        print("⚠️ Aucune description valide pour l’évaluation.")



# ============================
# 10. Exemple d’utilisation
# ============================
if __name__ == "__main__":
    keyword = "denim jacket"

    # Génère les descriptions (run_pipeline renvoie la liste des tuples (desc, summary, score))
    generated_results = run_pipeline(keyword, num_variants=5)

    document_pipeline()

    # Extraire uniquement les descriptions (le premier élément de chaque tuple)
    generated_descriptions = [desc for desc, _, _ in generated_results]

    # 📊 Évaluation sur un seul mot-clé avec les descriptions générées et la référence
    evaluate_system(
        keywords=[keyword],
        generated_descriptions=generated_descriptions,
        references=[
            "A versatile denim jacket with a classic cut and modern fit."
        ]
    )

    # print("\n⏳ Starting schedule loop. Press Ctrl+C to exit.")
    # while True:
    #     schedule.run_pending()
    #     time.sleep(30)  # check every 30 seconds



✅ Device set to use cpu


Device set to use cpu
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--- Génération pour: denim jacket ---

✅ Description 1 [Score: 2.88]:
The denim jackets are hand made from cotton and cotton. The jacket is made with cotton fabric, which is woven to fit snugly into the back of the jacket.
Item Description:Item :* leather jacketItemType:&& fabricItemName:+* fabric ItemType :& cottonItemDescription:-* clothingItemSize:%& woolItemColor:0:5%* garmentItemWidth:2:1%+%%

- All items are made in the same color. All item types are produced in a different color, and will be rolled together.

Item type
📝 Résumé qualité:
The denim jackets are hand made from cotton and cotton. The jacket is made with cotton fabric, which is woven to fit snugly into

✅ Description 2 [Score: 1.92]:
The first garment is a denim sweatshirt with a very lightweight material with the top section of the jacket. The garment features the color of a wool shirt, a pair of leather pants, and a large gold band.
It features a full-length black band with two gold bands. It features leather socks