In [None]:
!pip install openai wordcloud pandas matplotlib seaborn ipywidgets -q

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import json, re
import openai
import requests
from abc import ABC, abstractmethod
import ipywidgets as widgets
from IPython.display import display

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m66.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
class LLMProvider(ABC):
    @abstractmethod
    def get_analysis(self, product_name, rating, review_text):
        pass


class OpenAIProvider(LLMProvider):
    def __init__(self, api_key):
        self.client = openai.OpenAI(api_key=api_key)

    def get_analysis(self, product_name, rating, review_text):
        INSTRUCTIONS = """You are a CX Analyst. Analyze the product review and extract key insights.
Prioritize technical issues in review text over numerical rating.
Even if the rating is high, extract any mentioned problems into the 'complaints' array.
Identify specific 'advantages' (positive features) and 'complaints' (technical or service issues).
Your analysis must be objective and prioritize the text content over the numerical rating.
Review may be in Azerbaijani, Russian, or English — analyze regardless of language."""

        SCHEMA = {
            "type": "object",
            "properties": {
                "final_score": {"type": "integer"},
                "sentiment": {"type": "string", "enum": ["Positive", "Neutral", "Negative"]},
                "complaints": {"type": "array", "items": {"type": "string"}},
                "advantages": {"type": "array", "items": {"type": "string"}}
            },
            "required": ["final_score", "sentiment", "complaints", "advantages"],
            "additionalProperties": False
        }

        resp = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": INSTRUCTIONS},
                {"role": "user", "content": f"Product: {product_name}, Rating: {rating}, Review: {review_text}"}
            ],
            response_format={
                "type": "json_schema",
                "json_schema": {"name": "cx_schema", "schema": SCHEMA, "strict": True}
            }
        )
        return json.loads(resp.choices[0].message.content)

In [None]:
MY_TOKEN = "BTS"
BASE_URL = "https://4a93-37-26-62-220.ngrok-free.app"
HEADERS = {"ngrok-skip-browser-warning": "true"}

provider = OpenAIProvider(MY_TOKEN)

In [None]:
def clean_reviews(reviews: list) -> pd.DataFrame:
    df = pd.DataFrame(reviews)

    df = df[df["content"].notna()]
    df = df[df["content"].str.strip() != ""]

    df = df[df["content"].str.split().str.len() >= 3]

    df = df.drop_duplicates(subset=["content", "author"])

    def clean_text(text):
        emoji_pattern = re.compile("["
            u"\U0001F600-\U0001F64F"
            u"\U0001F300-\U0001F5FF"
            u"\U0001F680-\U0001F9FF"
            u"\U00002700-\U000027BF"
            "]+", flags=re.UNICODE)
        text = emoji_pattern.sub("", text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    df["content"] = df["content"].apply(clean_text)
    df = df[df["content"].str.strip() != ""]
    df["reviewDate"] = pd.to_datetime(df["reviewDate"]).dt.strftime("%Y-%m-%d")
    df["rating"] = df["rating"].astype(int)

    df = df.reset_index(drop=True)

    return df


In [None]:
def fetch_products():
    r = requests.get(f"{BASE_URL}/api/scrapers/product/all-minimal", headers=HEADERS)
    r.raise_for_status()
    return r.json()

def fetch_reviews(product_id):
    r = requests.get(f"{BASE_URL}/api/scrapers/product-reviews/{product_id}", headers=HEADERS)
    r.raise_for_status()
    return r.json()

In [None]:
products = fetch_products()
product_map = {p["name"]: p["id"] for p in products}

dropdown = widgets.Dropdown(
    options=list(product_map.keys()),
    description="Məhsul seç:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="600px")
)
btn = widgets.Button(description="Analiz et", button_style="primary")
output = widgets.Output()


def on_click(b):
    with output:
        output.clear_output()

        selected_name = dropdown.value
        selected_id = product_map[selected_name]
        print(f" {selected_name} ({selected_id}) məhsulu seçildi")

        raw_reviews = fetch_reviews(selected_id)
        print(f" {len(raw_reviews)} review tapıldı")

        if not raw_reviews:
            print(" Bu məhsul üçün review tapılmadı.")
            return

        clean_df = clean_reviews(raw_reviews)

        if clean_df.empty:
            print(" Təmizləmədən sonra review qalmadı.")
            return

        results = []
        for _, row in clean_df.iterrows():
            try:
                analysis = provider.get_analysis(
                    product_name=selected_name,
                    rating=row["rating"],
                    review_text=row["content"]
                )
                combined = {
                    "product_name": selected_name,
                    "author": row["author"],
                    "date": row["reviewDate"],
                    "rating": row["rating"],
                    "content": row["content"],
                    **analysis
                }
                results.append(combined)
            except Exception as e:
                print(f" Xəta: {e}")
                continue

        final_df = pd.DataFrame(results)
        print(f" Analiz tamamlandı!\n")

        display(final_df[["author", "date", "rating", "sentiment", "content"]].head(10))

        from matplotlib.gridspec import GridSpec

        fig = plt.figure(figsize=(22, 16))
        gs = GridSpec(2, 3, figure=fig, hspace=0.4, wspace=0.4)

        ax1 = fig.add_subplot(gs[0, 0])
        ax2 = fig.add_subplot(gs[0, 1])
        ax3 = fig.add_subplot(gs[0, 2])
        ax4 = fig.add_subplot(gs[1, :])

        fig.suptitle(f"{selected_name}", fontsize=13, fontweight="bold", wrap=True)


        sentiment_counts = final_df["sentiment"].value_counts()
        colors_map = {"Positive": "#4CAF50", "Neutral": "#FFC107", "Negative": "#F44336"}
        pie_colors = [colors_map.get(x, "#999999") for x in sentiment_counts.index]

        ax1.pie(
            sentiment_counts,
            labels=sentiment_counts.index,
            autopct='%1.1f%%',
            startangle=140,
            colors=pie_colors,
            wedgeprops={'edgecolor': 'white'}
        )
        ax1.set_title("Rəy Balansı", fontsize=14, fontweight="bold")


        all_advantages = []
        for a_list in final_df["advantages"]:
            if isinstance(a_list, list):
                all_advantages.extend(a_list)

        if all_advantages:
            adv_series = pd.Series(all_advantages).value_counts().head(8)
            sns.barplot(x=adv_series.values, y=adv_series.index, ax=ax2, palette="Greens_r")
            ax2.set_title("Müsbət Rəylər", color="green", fontweight="bold")
            ax2.set_xlabel("Say")
        else:
            ax2.text(0.5, 0.5, "Müsbət rəy tapılmadı", ha='center', va='center')
            ax2.set_title("Müsbət Rəylər", color="green", fontweight="bold")


        all_complaints = []
        for c_list in final_df["complaints"]:
            if isinstance(c_list, list):
                all_complaints.extend(c_list)

        if all_complaints:
            comp_series = pd.Series(all_complaints).value_counts().head(8)
            sns.barplot(x=comp_series.values, y=comp_series.index, ax=ax3, palette="Reds_r")
            ax3.set_title("Ən Çox Rast Gəlinən Problemlər", fontsize=14, fontweight="bold")
            ax3.set_xlabel("Say")
        else:
            ax3.text(0.5, 0.5, "Ciddi şikayət tapılmadı", ha='center', va='center')
            ax3.set_title("Problemlər", fontsize=14, fontweight="bold")


        negative_df = final_df[final_df["sentiment"] == "Negative"]
        wc_text = ""
        for complaints in negative_df["complaints"]:
            if isinstance(complaints, list):
                wc_text += " ".join(complaints) + " "

        if wc_text.strip():
            wordcloud = WordCloud(
                width=1600,
                height=500,
                background_color='white',
                colormap='Reds',
                max_words=50
            ).generate(wc_text)

            ax4.imshow(wordcloud, interpolation='bilinear')
            ax4.axis("off")
            ax4.set_title("Mənfi Məqamların Söz Buludu", fontsize=14, fontweight="bold")
        else:
            ax4.text(0.5, 0.5, " Mənfi rəy tapılmadığı üçün WordCloud yaradılmadı.",
                     ha='center', va='center', fontsize=13)
            ax4.axis("off")
            ax4.set_title("Mənfi Məqamların Söz Buludu", fontsize=14, fontweight="bold")

        plt.show()


display(dropdown, btn, output)
btn.on_click(on_click)

Dropdown(description='Məhsul seç:', layout=Layout(width='600px'), options=('Ağıllı dinamik Yandex stansiya Lit…

Button(button_style='primary', description='Analiz et', style=ButtonStyle())

Output()