In [117]:
import pandas as pd

data = [
    {
        'Positive': [100, 80, 90, 70, 60, 40, 55, 95, 85, 75, 60],
        'Neutral': [30, 20, 25, 15, 35, 45, 20, 25, 30, 40, 30],
        'Negative': [10, 25, 15, 30, 20, 10, 35, 5, 15, 10, 10]
    },
    {
        'Positive': [60, 40, 55, 95, 85, 75, 60, 100, 80, 90, 70],
        'Neutral': [30, 20, 25, 30, 40, 30, 15, 35, 45, 20, 25],
        'Negative': [35, 5, 15, 10, 10, 10, 25, 15, 30, 20, 10]
    },
    {
        'Positive': [100, 80, 75, 60, 90, 70, 60, 40, 55, 95, 85],
        'Neutral': [15, 35, 45, 20, 25, 30, 40, 30, 30, 20, 25],
        'Negative': [20, 10, 35, 5, 10, 25, 15, 30, 15, 10, 10]
    }
]

years = [2011, 2012, 2013]
categories = ['Product Quality', 'Shipping', 'Customer Service', 'Price', 'User Interface', 'Accessibility',
              'Returns Policy', 'Product Variety', 'Payment Options', 'Loyalty Programs']

# Initialize an empty list to store data
result = []

# Iterate over years, data, and categories simultaneously
for year, year_data in zip(years, data):
    for sentiment, sentiment_values in year_data.items():
        for category, count in zip(categories, sentiment_values):
            result.append({
                'Year': year,
                'Year_str' : str(year),
                'Sentiment': sentiment,
                'Category': category,
                'Count': count
            })

# Convert the result list to a DataFrame
df = pd.DataFrame(result)
df[['Year_str','Category','Count']][(df['Category']=='Product Quality') & (df['Sentiment']=='Positive')]

Unnamed: 0,Year_str,Category,Count
0,2011,Product Quality,100
30,2012,Product Quality,60
60,2013,Product Quality,100


In [1]:
import pandas as pd

import pickle
import pandas as pd

pd.options.display.max_colwidth = 400

product_asin='B018Y229OU'

with open(f'Reviews_{product_asin}_label_predef.pkl', 'rb') as f:
    df = pickle.load(f)
df

Unnamed: 0,product,year,year_str,review,sentiment,label_num,label
0,B018Y229OU,2016,2016,"Good basic tablet for checking email , web browsing , and reading ebooks.",positive,2,games and
1,B018Y229OU,2016,2016,I would strongly recommend buying it.,positive,5,definitely recommend
2,B018Y229OU,2016,2016,It's a great reader but also good for surfing the web.,positive,2,games and
3,B018Y229OU,2016,2016,"Easy to set up and use, plus the price was reasonable.",positive,4,very easy
4,B018Y229OU,2016,2016,I got this for my niece.,positive,1,bought this
...,...,...,...,...,...,...,...
449,B018Y229OU,2017,2017,Only thing is the charger doesnt stay in the port and battery doesnt last long,negative,6,only downside
450,B018Y229OU,2017,2017,"Love my watch bumper, but the tablet is so slow wouldn't recommend",negative,7,wouldn recommend
451,B018Y229OU,2017,2017,Too many ads.,negative,2,lock screen
452,B018Y229OU,2017,2017,The camera quality is horrible.,negative,5,the camera


In [2]:
df_summary = df.groupby(['year','year_str','sentiment','label_num','label'])['review'].count().reset_index().rename(columns={'review': 'count'}).sort_values(['year','sentiment','label_num'], ascending=False)
df_summary

Unnamed: 0,year,year_str,sentiment,label_num,label,count
127,2017,2017,positive,25,reasonably priced,28
126,2017,2017,positive,17,quad core,1
125,2017,2017,positive,15,starter tablet,26
124,2017,2017,positive,14,expectations and,8
123,2017,2017,positive,13,parental controls,34
...,...,...,...,...,...,...
4,2015,2015,negative,5,the camera,9
3,2015,2015,negative,4,figure out,4
2,2015,2015,negative,3,especially for,4
1,2015,2015,negative,2,lock screen,11


In [3]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
import plotly.express as px
import json

# Convert the dataset to a format suitable for treemaps
def prepare_data_for_treemap(year, sentiment):
    return df_summary[['sentiment','label','count']][(df_summary['year']==year) & (df_summary['sentiment']==sentiment)].sort_values(['count'],ascending=False).head(10)

def prepare_data_for_barchart(year):
    negative_count = sum(df_summary['count'][(df_summary['year']==year) & (df_summary['sentiment']=='negative')])
    positive_count = sum(df_summary['count'][(df_summary['year']==year) & (df_summary['sentiment']=='positive')])
    neutral_count = sum(df_summary['count'][(df_summary['year']==year) & (df_summary['sentiment']=='neutral')])
    negative_score = (negative_count/(negative_count+positive_count+neutral_count))*100
    positive_score = (positive_count/(negative_count+positive_count+neutral_count))*100
    neutral_score = (neutral_count/(negative_count+positive_count+neutral_count))*100
    return pd.DataFrame([['negative',negative_score,str(round(negative_score))+'%'],
                         ['neutral', neutral_score,str(round(neutral_score))+'%'],
                         ['positive', positive_score,str(round(positive_score))+'%']],
                         columns=['sentiments','percentage','percentage_text'])

# Preparing data
# positive_df = prepare_data_for_treemap(max(unique_years), 'Positive')
# neutral_df = prepare_data_for_treemap(max(unique_years), 'Neutral')
# negative_df = prepare_data_for_treemap(max(unique_years), 'Negative')

# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Define the layout of the app
app.layout = dbc.Container(
    [
        html.H1(children='FEEDVIZ: Visualising Customer Feedback', className="feedviz_title"),

        dbc.Row(
            [
                dbc.Col(
                    dbc.Stack([
                        html.Div(children=dcc.Graph(id='positive-treemap')),
                        html.Div(children=dcc.Graph(id='neutral-treemap')),
                        html.Div(children=dcc.Graph(id='negative-treemap'))
                    ], className="treemap-container"),style={'height':'70vh'},width={'size':11}),

                dbc.Col(html.Div(children=[
                    dcc.Graph(id='sentiments-bar', className="bargraph-container")
                ]),style={'height':'70vh'},width=1)
            ],className="p-0"
        ),

        html.Br(),

        dbc.Row(
            dbc.Col(html.Div(children=[
            dcc.Slider(
                id='year-slider',
                min=min(list(df.year.unique())),
                max=max(list(df.year.unique())),
                step=1,
                marks={year: str(year) for year in list(df.year.dropna().astype(int))},
                value=max(list(df.year.unique())),  # Starting value
            )
        ]),width={'size':12})),

        html.Div([
                    dbc.Modal([
                        dbc.ModalHeader(dbc.ModalTitle(id="modal-header")),
                        dbc.ModalBody(id="modal-body")
        ], id="modal", size="lg", is_open=False),
                ])
                    ],
    className="g-0",fluid=True, style={'background-color': 'rgba(236, 227, 191, 0.414)','margin-left':'50px','padding-left':'50px','padding-right':'50px','padding-bottom':'50px','width':'1200px'},
)

# Define callback to update treemaps based on slider value
@app.callback(
    [Output('sentiments-bar', 'figure'),
     Output('positive-treemap', 'figure'),
     Output('neutral-treemap', 'figure'),
     Output('negative-treemap', 'figure')],
    [Input('year-slider', 'value')]
)
def update_treemaps(selected_year):
    # Filter data based on the selected year
    positive_df = prepare_data_for_treemap(selected_year, 'positive')
    neutral_df = prepare_data_for_treemap(selected_year, 'neutral')
    negative_df = prepare_data_for_treemap(selected_year, 'negative')

    positive_figure = px.treemap(positive_df, path=['sentiment','label'], values='count', color='count',
                   color_continuous_scale='blugrn', height=200)
    positive_figure.update_layout(margin = dict(t=30, l=0, r=0, b=0),coloraxis_showscale=False)
    positive_figure.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    positive_figure.update_traces(marker=dict(cornerradius=5),hovertemplate='Theme: %{label}<br>Reviews: %{value}<extra></extra>')
    positive_figure.to_html(config={"staticPlot": True})

    neutral_figure = px.treemap(neutral_df, path=['sentiment','label'], values='count', color='count',
                   color_continuous_scale='turbid', height=200)
    neutral_figure.update_layout(margin = dict(t=25, l=0, r=0, b=0),coloraxis_showscale=False)
    neutral_figure.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    neutral_figure.update_traces(marker=dict(cornerradius=5),hovertemplate='Theme: %{label}<br>Reviews: %{value}<extra></extra>')

    negative_figure = px.treemap(negative_df, path=['sentiment','label'], values='count', color='count',
                   color_continuous_scale='reds', height=200)
    negative_figure.update_layout(margin = dict(t=25, l=0, r=0, b=0),coloraxis_showscale=False)
    negative_figure.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    negative_figure.update_traces(marker=dict(cornerradius=5),hovertemplate='Theme: %{label}<br>Reviews: %{value}<extra></extra>')

    bar_df = prepare_data_for_barchart(selected_year)
    bar_fig = px.bar(bar_df, x=px.Constant(''), y="percentage", color="sentiments", text = "percentage_text",
                     color_discrete_map={'negative': '#f34c37','neutral':'#94653a','positive':'#3d8f7d'}, height=600)
    bar_fig.update_layout(xaxis=dict(showgrid=False,visible=False),
              yaxis=dict(showgrid=False,visible=False),margin = dict(t=0, l=0, r=0, b=0))
    bar_fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    bar_fig.update_traces(showlegend=False, textposition='inside', insidetextanchor='middle',textfont_size=16)
    bar_fig.update_layout(hovermode=False)

    return bar_fig,positive_figure,neutral_figure,negative_figure

@app.callback(
    [Output('modal', 'is_open'),
     Output('modal-header', 'children'),
     Output('modal-body', 'children')],
    [Input('positive-treemap', 'clickData'),
     Input('neutral-treemap', 'clickData'),
     Input('negative-treemap', 'clickData'),
    Input('modal', 'is_open')],
    [State('year-slider', 'value')]
)
def update_modal(positive_clickData, neutral_clickData, negative_clickData, is_open, selected_year):
     # New code to handle modal based on treemap clicks
    ctx = dash.callback_context

    if not ctx.triggered:
        trigger_id = 'No clicks yet'
    else:
        trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]

    modal_title = ""
    modal_content = ""
    if trigger_id in ['positive-treemap', 'neutral-treemap', 'negative-treemap']:
        
        fig_color = '#f34c37'
        fig_sentiment = 'negative'
        fig_class = 'review_neg'

        if trigger_id == 'positive-treemap':
            fig_color = '#3d8f7d'
            fig_sentiment = 'positive'
            fig_class = 'review_pos'
        elif trigger_id == 'neutral-treemap':
            fig_color = '#94653a'
            fig_sentiment = 'neutral'
            fig_class = 'review_neu'
        else:
            fig_color = '#f34c37'
            fig_sentiment = 'negative'
            fig_class = 'review_neg'

        click_data = ctx.triggered[0]['value']
        if click_data and click_data['points'][0]['percentEntry']!=1:
            label = click_data['points'][0]['label']

            # Creating the histogram
            fig = px.bar(df_summary[['year_str','label','count']][(df_summary['label']==label) & (df_summary['sentiment']==fig_sentiment)], x='year_str', y='count', 
                         color='label',  barmode='group', height=300)
            # To ensure there are no spaces between bars within the same group, adjust bargap
            fig.update_layout(bargap=0, xaxis_title='year', yaxis_title='Reviews Count',margin = dict(t=0, l=0, r=0, b=0), 
                              xaxis=dict(showgrid=False),
                              yaxis=dict(showgrid=False))
            # Optional: Update layout for a cleaner look
            fig.update_traces(hovertemplate='Year: %{x}<br>Reviews: %{y}<extra></extra>',showlegend=False, marker_color=fig_color)
            Reviews = df.review[(df['label']==label) & (df['sentiment']==fig_sentiment) & (df['year']==selected_year)]
            modal_title = f"Theme: {label}"
            modal_content = [html.Div(
                                        [html.P(f'"{review}"',className=fig_class) for review in Reviews],
                                        style={'overflowY': 'scroll', 'height': '300px'}  # Make this div scrollable
                                    ),
                            html.Div(
                                        [html.H4('Theme Progression Over the Years')],
                                        className='progression_heading'  # Make this div scrollable
                                    ),
                            html.Div(
                                        [dcc.Graph(figure=fig)],
                                    )]
            is_open = True
        else:
            is_open = False

    # Ensure you return the correct number of elements as per your output components
    return is_open, modal_title, modal_content


# Run the app
if __name__ == '__main__':
    app.run_server(debug=False, host='0.0.0.0', port = 8000)


In [26]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Sample DataFrame
df = pd.DataFrame({
    'NEW REGION': ['North', 'North', 'North', 'North','South','South','South','South'],
    'CUSTOMER': ['A', 'A', 'A', 'A','B','B','B','B'],
    'PRODUCT CATEGORY': ['X', 'X', 'Y', 'Y', 'X', 'X', 'Y', 'Y'],
    'WEEK': [1, 2, 1, 2, 1, 2, 1, 2],
    'SALES': [100, 200, 150, 250, 300, 100, 50, 250]
})

# Initialize the Dash app
app = dash.Dash(__name__)

# Define dropdown options
regions = df['NEW REGION'].unique()
customers = df['CUSTOMER'].unique()
categories = df['PRODUCT CATEGORY'].unique()

# Define app layout
app.layout = html.Div([
    html.Div([
        dcc.Dropdown(
            id='region-dropdown',
            options=[{'label': region, 'value': region} for region in regions],
            value=regions[0],
            multi=True,
            placeholder="Select Region"
        ),
        dcc.Dropdown(
            id='customer-dropdown',
            options=[{'label': customer, 'value': customer} for customer in customers],
            value=customers[0],
            multi=True,
            placeholder="Select Customer"
        ),
        dcc.Dropdown(
            id='category-dropdown',
            options=[{'label': category, 'value': category} for category in categories],
            value=categories[0],
            multi=True,
            placeholder="Select Category"
        ),
    ], style={'width': '30%', 'display': 'inline-block'}),
    
    dcc.Graph(id='sales-line-plot'),
    html.P(id='text', className="text"),
])

# Define callback to update the plot
@app.callback(
    [Output('sales-line-plot', 'figure'),
     Output('text', 'children')],
    [Input('region-dropdown', 'value'),
     Input('customer-dropdown', 'value'),
     Input('category-dropdown', 'value')]
)
def update_figure(selected_regions, selected_customers, selected_categories):
    # print(selected_regions)
    # print(selected_customers)
    # print(selected_categories)
    if isinstance(selected_regions,str): selected_regions = [selected_regions]
    if isinstance(selected_customers,str): selected_customers = [selected_customers]
    if isinstance(selected_categories,str): selected_categories = [selected_categories]
    filtered_df = df[
        (df['NEW REGION'].isin(selected_regions)) &
        (df['CUSTOMER'].isin(selected_customers)) &
        (df['PRODUCT CATEGORY'].isin(selected_categories))
    ]
    fig = px.line(filtered_df, x='WEEK', y='SALES', color='CUSTOMER', line_group='PRODUCT CATEGORY', labels={'SALES': 'Sales'})
    return fig,str(filtered_df)

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

In [22]:
df = pd.DataFrame({
    'NEW REGION': ['North', 'North', 'North', 'North'],
    'CUSTOMER': ['A', 'A', 'A', 'A'],
    'PRODUCT CATEGORY': ['X', 'X', 'Y', 'Y'],
    'WEEK': [1, 2, 1, 2],
    'SALES': [100, 200, 150, 250]
})

selected_regions='North'
selected_customers='A'
selected_categories='X'

filtered_df = df[
    (df['NEW REGION'].isin(list(selected_regions))) &
    (df['CUSTOMER'].isin(list(selected_customers))) &
    (df['PRODUCT CATEGORY'].isin(list(selected_categories)))
]
list(selected_regions)



['N', 'o', 'r', 't', 'h']