In [6]:
import dash
from dash import dcc, html
import plotly.express as px
import pandas as pd
import ast

# Step 1: Data Exploration and Cleaning
data = pd.read_csv('flipkart_com-ecommerce_sample.csv')

# Convert the product_category_tree to a list of categories using ast.literal_eval
data['product_categories'] = data['product_category_tree'].apply(lambda x: ast.literal_eval(x))

# Get the count of categories in each row
data['category_count'] = data['product_categories'].apply(len)

# Drop rows with missing values in 'product_rating' and 'product_price' columns
data.dropna(subset=['product_rating', 'product_price'], inplace=True)

# Step 2: Create the Dashboard Layout
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Flipkart E-commerce Sample Dataset Dashboard", style={'text-align': 'center'}),

    # Visualization 1: Product Categories Count
    dcc.Graph(
        id='category-count-bar',
        figure={
            'data': [
                {'x': data['category_count'].value_counts().index, 'y': data['category_count'].value_counts().values,
                 'type': 'bar'}
            ],
            'layout': {
                'title': 'Number of Product Categories',
                'xaxis': {'title': 'Number of Categories'},
                'yaxis': {'title': 'Count'}
            }
        }
    ),

    # Visualization 2: Product Ratings Distribution
    dcc.Graph(
        id='product-rating-histogram',
        figure={
            'data': [
                {'x': data['product_rating'], 'type': 'histogram'}
            ],
            'layout': {
                'title': 'Distribution of Product Ratings',
                'xaxis': {'title': 'Product Rating'},
                'yaxis': {'title': 'Count'}
            }
        }
    ),

    # Visualization 3: Brand-wise Product Count
    dcc.Graph(
        id='top-brands-bar',
        figure={
            'data': [
                {'x': data['brand'].value_counts().nlargest(10).index,
                 'y': data['brand'].value_counts().nlargest(10).values,
                 'type': 'bar'}
            ],
            'layout': {
                'title': 'Top 10 Brands by Product Count',
                'xaxis': {'title': 'Brand'},
                'yaxis': {'title': 'Count'}
            }
        }
    ),

    # Visualization 4: Rating vs. Price (Interactive Scatter Plot using Plotly)
    dcc.Graph(
        id='rating-vs-price-scatter',
        figure=px.scatter(data, x='product_rating', y='product_price', color='brand', hover_name='product_name',
                          size='category_count', log_y=True,
                          title='Rating vs. Price by Brand')
    )
])

if __name__ == '__main__':
    app.run_server(debug=True)
