In [1]:
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go

In [2]:
plt.style.use('ggplot')
df = pd.read_excel('merge.xlsx')
df

Unnamed: 0,title,asin,overall,reviewTime
0,Slime Time Fall Fest [With CDROM and Collector...,0764443682,5,"11 26, 2012"
1,XCC Qi promise new spider snake preparing men'...,1291691480,5,"08 2, 2016"
2,Magical Things I Really Do Do Too!,1940280001,5,"07 10, 2013"
3,"Ashes to Ashes, Oranges to Oranges",1940735033,4,"07 13, 2017"
4,"Ashes to Ashes, Oranges to Oranges",1940735033,5,"05 31, 2014"
...,...,...,...,...
884801,Edith Windsor Women's Deep V-neck Beaded Sequi...,B01HJHTH5U,5,"02 21, 2017"
884802,Edith Windsor Women's Deep V-neck Beaded Sequi...,B01HJHTH5U,5,"11 25, 2016"
884803,Edith Windsor Women's Deep V-neck Beaded Sequi...,B01HJHTH5U,3,"11 10, 2016"
884804,Aeropostale Women's Sun & Waves Crop Cami L Gr...,B01HJFNU7S,5,"09 16, 2017"


In [3]:
df['reviewTime'] = pd.to_datetime(df['reviewTime'], format='%m %d, %Y')
df['year_month'] = df['reviewTime'].dt.to_period('M')
df['year_month_str'] = df['year_month'].astype(str)
df_grouped = df.groupby(['year_month_str', 'asin'])['overall'].count().reset_index(name='count')

In [4]:
top_products = df['asin'].value_counts().head(10).index.tolist()
top_products

['B000V0IBDM',
 'B000KPIHQ4',
 'B00I0VHS10',
 'B00RLSCLJM',
 'B000PHANNM',
 'B000YFSR5G',
 'B00201ER88',
 'B00GXE331K',
 'B000P0X15G',
 'B00XT15P8E']

In [5]:
df

Unnamed: 0,title,asin,overall,reviewTime,year_month,year_month_str
0,Slime Time Fall Fest [With CDROM and Collector...,0764443682,5,2012-11-26,2012-11,2012-11
1,XCC Qi promise new spider snake preparing men'...,1291691480,5,2016-08-02,2016-08,2016-08
2,Magical Things I Really Do Do Too!,1940280001,5,2013-07-10,2013-07,2013-07
3,"Ashes to Ashes, Oranges to Oranges",1940735033,4,2017-07-13,2017-07,2017-07
4,"Ashes to Ashes, Oranges to Oranges",1940735033,5,2014-05-31,2014-05,2014-05
...,...,...,...,...,...,...
884801,Edith Windsor Women's Deep V-neck Beaded Sequi...,B01HJHTH5U,5,2017-02-21,2017-02,2017-02
884802,Edith Windsor Women's Deep V-neck Beaded Sequi...,B01HJHTH5U,5,2016-11-25,2016-11,2016-11
884803,Edith Windsor Women's Deep V-neck Beaded Sequi...,B01HJHTH5U,3,2016-11-10,2016-11,2016-11
884804,Aeropostale Women's Sun & Waves Crop Cami L Gr...,B01HJFNU7S,5,2017-09-16,2017-09,2017-09


In [6]:
df_grouped

Unnamed: 0,year_month_str,asin,count
0,2002-11,B000072XQQ,2
1,2002-11,B000073249,2
2,2002-11,B00007325C,2
3,2002-12,B0000731RO,2
4,2002-12,B00007CWBQ,2
...,...,...,...
515758,2018-10,B01GGGSD6E,1
515759,2018-10,B01GHPMJYQ,1
515760,2018-10,B01GRGM0XA,1
515761,2018-10,B01H410W0A,1


In [7]:
fig = go.Figure()

for product in top_products:
    df_filtered = df_grouped[df_grouped['asin'] == product]
    fig.add_trace(go.Bar(x=df_filtered['year_month_str'], y=df_filtered['count'], name=product, visible=False))

fig.data[0].visible = True

buttons = [dict(label="All",
                method="update",
                args=[{"visible": [True if i == 0 else False for i, _ in enumerate(fig.data)]},
                      {"title": "Rating Distribution Over Time for All Top Products"}])]

for i, product in enumerate(top_products):
    visibility = [i == j for j in range(len(top_products))]
    button = dict(label=product,
                  method="update",
                  args=[{"visible": visibility},
                        {"title": f"Rating Distribution Over Time for {product}"}])
    buttons.append(button)


fig.update_layout(
    updatemenus=[dict(active=0,
                      buttons=buttons,
                      x=0.15,
                      xanchor="left",
                      y=1.15,
                      yanchor="top")],
    xaxis_title="Year Month",
    yaxis_title="Count",
    title="Rating Distribution Over Time for All Top Products"
)

fig.update_xaxes(range=["2007-01", "2019-12"])
fig.show()


In [8]:
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id='product-dropdown',
        options=[{'label': i, 'value': i} for i in top_products],
        value=top_products[0]
    ),
    dcc.Dropdown(
        id='rating-dropdown',
        options=[{'label': i, 'value': i} for i in range(1, 6)],
        value=1
    ),
    dcc.Graph(id='rating-graph')
])

@app.callback(
    Output('rating-graph', 'figure'),
    [Input('product-dropdown', 'value'),
     Input('rating-dropdown', 'value')]
)
def update_graph(selected_product, selected_rating):
    filtered_df = df_grouped[(df_grouped['asin'] == selected_product) & (df_grouped['overall'] == selected_rating)]
    fig = go.Figure(data=[
        go.Bar(
            x=filtered_df['year_month_str'],
            y=filtered_df['count'],
            text=filtered_df['count'],
            textposition='auto',
        )
    ])
    fig.update_layout(title_text=f'Rating {selected_rating} Distribution Over Time for {selected_product}')
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)


In [9]:
import plotly.graph_objects as go
import pandas as pd

# Giả sử df là DataFrame của bạn với các cột như mô tả

# Lấy danh sách top 10 sản phẩm dựa trên số lượng ratings
top_products = df['asin'].value_counts().head(10).index.tolist()

# Tính toán số lượng ratings cho mỗi sản phẩm theo điểm rating và theo thời gian
df_grouped = df.groupby(['year_month_str', 'asin', 'overall']).size().reset_index(name='count')

# Tạo figure ban đầu
fig = go.Figure()

# Tạo một dictionary để lưu trữ vị trí của các traces
trace_positions = {}

# Thêm dữ liệu cho mỗi điểm rating của mỗi sản phẩm vào figure
for i, product in enumerate(top_products):
    for rating in range(1, 6):  # Duyệt qua mỗi điểm rating từ 1 đến 5
        df_filtered = df_grouped[(df_grouped['asin'] == product) & (df_grouped['overall'] == rating)]
        trace = go.Bar(x=df_filtered['year_month_str'], y=df_filtered['count'], name=f"{product} Rating {rating}", visible=(i == 0 and rating == 1))
        fig.add_trace(trace)
        trace_positions[(product, rating)] = len(fig.data) - 1

# Khởi tạo lại button cho dropdown menu sản phẩm
product_buttons = []
for i, product in enumerate(top_products):
    # Tạo danh sách visibility, mặc định tất cả là False
    visible_state = [False] * len(fig.data)
    # Kích hoạt visibility cho tất cả traces liên quan đến sản phẩm này
    for rating in range(1, 6):
        if (product, rating) in trace_positions:
            trace_index = trace_positions[(product, rating)]
            visible_state[trace_index] = True
    # Thêm button cho sản phẩm này
    product_buttons.append(dict(label=product,
                                method="update",
                                args=[{"visible": visible_state}]))

# Khởi tạo lại button cho dropdown menu rating
rating_buttons = []
for rating in range(1, 6):
    # Tạo danh sách visibility, mặc định tất cả là False
    visible_state = [False] * len(fig.data)
    # Kích hoạt visibility cho tất cả traces liên quan đến rating này
    for product in top_products:
        if (product, rating) in trace_positions:
            trace_index = trace_positions[(product, rating)]
            visible_state[trace_index] = True
    # Thêm button cho rating này
    rating_buttons.append(dict(label=f"Rating {rating}",
                               method="update",
                               args=[{"visible": visible_state}]))

# Cập nhật figure với dropdown menus mới
fig.update_layout(
    updatemenus=[
        dict(
            buttons=product_buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top",
            bgcolor="LightSteelBlue"
        ),
        dict(
            buttons=rating_buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.3,
            xanchor="left",
            y=1.1,
            yanchor="top",
            bgcolor="LightSteelBlue"
        )
    ],
    xaxis_title="Year Month",
    yaxis_title="Count"
)

# Cập nhật title cho layout một cách chính xác
fig.update_layout(title_text="Rating Distribution Over Time for All Top Products")



fig.show()
