In [None]:
from flask import Flask, render_template, request
import torch
import joblib
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, AutoModelForSeq2SeqLM

# === الإعداد ===
app = Flask(__name__)

# === تحميل نماذج التصنيف ===
cls_tokenizer = AutoTokenizer.from_pretrained("../classification/transforms_fine_tuning2")
cls_model = AutoModelForSequenceClassification.from_pretrained("../classification/transforms_fine_tuning2")
cls_model.eval()

label_map = {0: "Negative", 1: "Positive", 2: "Neutral"}

# === تحميل نماذج التلخيص ===
kmeans_model = joblib.load("../claster/kmeans_model.pkl")
gen_tokenizer = AutoTokenizer.from_pretrained("../summary/my_bart_summary")
gen_model = AutoModelForSeq2SeqLM.from_pretrained("../summary/my_bart_summary")
gen_model.eval()
cls_model2 = AutoModel.from_pretrained("../classification/transforms_fine_tuning2")
cls_model2.eval()

cluster_names = {
    0: "Entry-Level and Kids Fire Tablets",
    1: "Batteries, Laptop Gear, and Basic Accessories",
    2: "Streaming Devices and E-Readers",
    3: "Advanced E-Readers and Smart Assistants",
    4: "Echo Speakers and Smart Home Hubs"
}

# تحميل البيانات
reviews_df = pd.read_csv("../claster/done.csv")

# === دوال ===
def classify_review(text):
    inputs = cls_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    with torch.no_grad():
        outputs = cls_model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        pred = torch.argmax(probs, dim=1).item()
    return label_map[pred]

def get_cluster(text):
    inputs = cls_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    with torch.no_grad():
        output = cls_model2(**inputs)
    token_embeddings = output.last_hidden_state
    attention_mask = inputs['attention_mask']
    mask_exp = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    summed = torch.sum(token_embeddings * mask_exp, 1)
    summed_mask = torch.clamp(mask_exp.sum(1), min=1e-9)
    mean = (summed / summed_mask).cpu().numpy()
    cluster_num = kmeans_model.predict(mean)[0]
    return cluster_names[cluster_num]

def summarize_review(user_text):
    category = get_cluster(user_text)
    df1 = reviews_df[reviews_df["cluster"] == category]

    top_rated = df1[df1["reviews.rating"] == 5]
    top_3 = top_rated["name"].value_counts().head(3).index.tolist()
    differences = "\n".join([f"- {i+1}. {name}" for i, name in enumerate(top_3)])

    complaints = {}
    negative = df1[(df1["reviews.rating"] <= 2) & (df1["reviews.doRecommend"] == False)]
    for prod in top_3:
        texts = negative[negative["name"] == prod]["reviews.text"]
        sample = texts.sample(min(3, len(texts))) if len(texts) > 0 else []
        complaints[prod] = " | ".join(sample)

    worst_df = df1[df1["reviews.doRecommend"] == False]
    if not worst_df.empty:
        worst_product = worst_df["name"].value_counts().idxmax()
        worst_reasons = worst_df[worst_df["name"] == worst_product]["reviews.text"].sample(min(3, len(worst_df))).tolist()
    else:
        worst_product = "غير متوفر"
        worst_reasons = ["لا توجد أسباب واضحة."]

    prompt = f"""
📦 Cluster: {category}

✅ Top 3 Products:
{differences}

🔍 Key Differences:
Explain how these products differ in features, design, or value.

⚠️ Top Complaints:
- {top_3[0]}: {complaints.get(top_3[0], '')}
- {top_3[1]}: {complaints.get(top_3[1], '')}
- {top_3[2]}: {complaints.get(top_3[2], '')}

🚫 Worst Product:
{worst_product}
Reasons to avoid:
{" | ".join(worst_reasons)}
"""

    inputs = gen_tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=1024)
    summary_ids = gen_model.generate(inputs["input_ids"], max_length=300, num_beams=4, early_stopping=True)
    output = gen_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return output

# === المسارات ===
@app.route('/', methods=['GET', 'POST'])
def index():
    result = ""
    if request.method == 'POST':
        mode = request.form['mode']
        text = request.form['user_text']
        if mode == 'classification':
            result = classify_review(text)
        elif mode == 'summarization':
            result = summarize_review(text)
    return render_template("index.html", result=result)

# === تشغيل التطبيق ===
if __name__ == '__main__':
    app.run(debug=True, use_reloader=False, port=5001)




 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5001
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [14/Apr/2025 09:37:50] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [14/Apr/2025 09:39:04] "[35m[1mPOST / HTTP/1.1[0m" 500 -
Traceback (most recent call last):
  File "/Users/a7mad/Desktop/git_lab/ironhack/ironhack/lib/python3.10/site-packages/flask/app.py", line 1536, in __call__
    return self.wsgi_app(environ, start_response)
  File "/Users/a7mad/Desktop/git_lab/ironhack/ironhack/lib/python3.10/site-packages/flask/app.py", line 1514, in wsgi_app
    response = self.handle_exception(e)
  File "/Users/a7mad/Desktop/git_lab/ironhack/ironhack/lib/python3.10/site-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/a7mad/Desktop/git_lab/ironhack/ironhack/lib/python3.10/site-packages/flask/app.py", line 919, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/a7mad/Desktop/git_lab/ironhack/ironhack/lib/python3.10/site-packages/flask/ap