In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# تحميل النموذج المحفوظ محليًا
gen_tokenizer = AutoTokenizer.from_pretrained("my_bart_summary")
gen_model = AutoModelForSeq2SeqLM.from_pretrained("my_bart_summary")
gen_model.eval()




BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
    

In [4]:
import pandas as pd
df = pd.read_csv('../claster/done.csv')

In [9]:
import pandas as pd
import numpy as np
import torch
import joblib
import gradio as gr
from transformers import AutoTokenizer, AutoModel, AutoTokenizer, AutoModelForSeq2SeqLM

# ========== التحميل ==========
# تحميل البيانات


# تحميل RoBERTa المصنف
cls_tokenizer = AutoTokenizer.from_pretrained("../claster/my_roberta")
cls_model = AutoModel.from_pretrained("../claster/my_roberta")
cls_model.eval()

# تحميل KMeans
kmeans_model = joblib.load("../claster/kmeans_model.pkl")

# تحميل BART التوليدي المحفوظ محليًا
gen_tokenizer = AutoTokenizer.from_pretrained("my_bart_summary")
gen_model = AutoModelForSeq2SeqLM.from_pretrained("my_bart_summary")
gen_model.eval()

# أسماء الكلاسترات
cluster_names = {
    0: "Entry-Level and Kids Fire Tablets",
    1: "Batteries, Laptop Gear, and Basic Accessories",
    2: "Streaming Devices and E-Readers",
    3: "Advanced E-Readers and Smart Assistants",
    4: "Echo Speakers and Smart Home Hubs"
}

# ========== دوال ==========
def get_cluster(text):
    inputs = cls_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    with torch.no_grad():
        output = cls_model(**inputs)
    token_embeddings = output.last_hidden_state
    attention_mask = inputs['attention_mask']
    mask_exp = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    summed = torch.sum(token_embeddings * mask_exp, 1)
    summed_mask = torch.clamp(mask_exp.sum(1), min=1e-9)
    mean = (summed / summed_mask).cpu().numpy()
    cluster_num = kmeans_model.predict(mean)[0]
    return cluster_names[cluster_num]

def generate_summary(user_text):
    category = get_cluster(user_text)
    df1 = df[df["cluster"] == category]

    # أفضل 3 منتجات
    top_rated = df1[df1["reviews.rating"] == 5]
    top_3 = top_rated["name"].value_counts().head(3).index.tolist()
    differences = "\n".join([f"- {i+1}. {name}" for i, name in enumerate(top_3)])

    # الشكاوى
    complaints = {}
    negative = df1[(df1["reviews.rating"] <= 2) & (df1["reviews.doRecommend"] == False)]
    for prod in top_3:
        texts = negative[negative["name"] == prod]["reviews.text"]
        sample = texts.sample(min(3, len(texts))) if len(texts) > 0 else []
        complaints[prod] = " | ".join(sample)

    # أسوأ منتج
    worst_df = df1[df1["reviews.doRecommend"] == False]
    if not worst_df.empty:
        worst_product = worst_df["name"].value_counts().idxmax()
        worst_reasons = worst_df[worst_df["name"] == worst_product]["reviews.text"].sample(min(3, len(worst_df))).tolist()
    else:
        worst_product = "غير متوفر"
        worst_reasons = ["لا توجد أسباب واضحة."]

    # بناء البرومبت
    prompt = f"""
📦 Cluster: {category}

✅ Top 3 Products:
{differences}

🔍 Key Differences:
Explain how these products differ in features, design, or value.

⚠️ Top Complaints:
- {top_3[0]}: {complaints.get(top_3[0], '')}
- {top_3[1]}: {complaints.get(top_3[1], '')}
- {top_3[2]}: {complaints.get(top_3[2], '')}

🚫 Worst Product:
{worst_product}
Reasons to avoid:
{" | ".join(worst_reasons)}
"""

    # التوليد باستخدام BART
    inputs = gen_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=1024)
    summary_ids = gen_model.generate(inputs["input_ids"], max_length=300, num_beams=4, early_stopping=True)
    output = gen_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return output

# ========== Gradio واجهة ==========
gr.Interface(
    fn=generate_summary,
    inputs=gr.Textbox(label="📝 اكتب وصف المنتج أو مراجعة"),
    outputs="text",
    title="📊 توليد مقال توصية للمنتجات",
    description="يحدد النموذج الكلاستر المناسب، ثم يولد مقالاً يحتوي على أفضل 3 منتجات، أهم الشكاوى، وأسوأ منتج ولماذا يجب تجنبه."
).launch()




* Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.






In [None]:
Amazon Echo Show Alexa-enabled Bluetooth Speaker with 7" Screen                                                                     845
