In [1]:
!pip install dash jupyter-dash

Collecting dash
  Downloading dash-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting jupyter-dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl.metadata (3.6 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting ansi2html (from jupyter-dash)
  Downloading ansi2html-1.9.2-py3-none-any.whl.metadata (3.7 kB)
Collecting jedi>=0.16 (from ipython->jupyter-dash)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading dash-3.0.4-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Downloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from dash import dcc, html
from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose

# Load and process the main dataset
df = pd.read_csv('/content/avocado_cleaned.csv')
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['weekday_name'] = df['date'].dt.day_name()
geo_to_state = {
    'Albany': 'NY',
    'Roanoke': 'VA',
    'Richmond/Norfolk': 'VA',
    'Raleigh/Greensboro': 'NC',
    'Portland': 'OR',
    'Plains': 'KS',
    'Pittsburgh': 'PA',
    'Phoenix/Tucson': 'AZ',
    'Philadelphia': 'PA',
    'Orlando': 'FL',
    'Northern New England': 'VT',
    'Northeast': 'NY',
    'New York': 'NY',
    'Sacramento': 'CA',
    'San Diego': 'CA',
    'West Tex/New Mexico': 'TX',
    'West': 'CA',
    'Total U.S.': 'US',
    'Tampa': 'FL',
    'Syracuse': 'NY',
    'St. Louis': 'MO',
    'New Orleans/Mobile': 'LA',
    'Spokane': 'WA',
    'Southeast': 'GA',
    'South Central': 'TX',
    'South Carolina': 'SC',
    'Seattle': 'WA',
    'San Francisco': 'CA',
    'Nashville': 'TN',
    'Denver': 'CO',
    'Dallas/Ft. Worth': 'TX',
    'Columbus': 'OH',
    'Cincinnati/Dayton': 'OH',
    'Chicago': 'IL',
    'Charlotte': 'NC',
    'California': 'CA',
    'Buffalo/Rochester': 'NY',
    'Boston': 'MA',
    'Boise': 'ID',
    'Baltimore/Washington': 'MD',
    'Atlanta': 'GA',
    'Detroit': 'MI',
    'Midsouth': 'TN',
    'Miami/Ft. Lauderdale': 'FL',
    'Louisville': 'KY',
    'Los Angeles': 'CA',
    'Las Vegas': 'NV',
    'Jacksonville': 'FL',
    'Indianapolis': 'IN',
    'Grand Rapids': 'MI',
    'Houston': 'TX',
    'Hartford/Springfield': 'CT',
    'Harrisburg/Scranton': 'PA',
    'Great Lakes': 'IL'
}
df['state_code'] = df['geography'].map(geo_to_state)
df_map = df[df['state_code'].notna()]

df = df.set_index('date')

# Resample the data to monthly frequency and sum the 'total_volume'
monthly_sales = df['total_volume'].resample('M').sum()
decomposition = seasonal_decompose(monthly_sales, model='additive', period=12)

# Prepare additional datasets
df_plu = df.groupby('date')[['4046', '4225', '4770']].sum()
df = df.reset_index()
monthly = df.copy()
monthly['year_month'] = df['date'].dt.to_period('M').astype(str)
df = df.set_index('date')
monthly = monthly.groupby('year_month')['total_volume'].sum().reset_index()

geography_sales = df.groupby('geography')['total_volume'].sum().sort_values(ascending=False)
df = df.reset_index()
monthly_trends = df.copy()

monthly_trends['month'] = df['date'].dt.strftime('%B')
df = df.set_index('date')
monthly_trends = monthly_trends.groupby('month').agg({'total_volume': 'sum', 'average_price': 'mean'}).reset_index()
df = df.reset_index()
month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July',
               'August', 'September', 'October', 'November', 'December']
monthly_trends['month'] = pd.Categorical(monthly_trends['month'], categories=month_order, ordered=True)
monthly_trends = monthly_trends.sort_values('month')

# App initialization
app = JupyterDash(__name__)

# Layout with original + extra graphs
app.layout = html.Div([
    html.H1("Avocado Sales Dashboard", style={'text-align': 'center'}),

    # Filter and Summary Row
    html.Div([
        html.Div([
            html.H3("Overall Metrics", style={'text-align': 'center'}),
            html.P(f"Total Volume Sold: {df['total_volume'].sum():,.2f}"),
            html.P(f"Average Price: ${df['average_price'].mean():.2f}"),
            html.P(f"Number of Geographies: {df['geography'].nunique()}"),
        ], style={'width': '30%', 'display': 'inline-block'}),

        html.Div([
            html.H3("Filter by Geography"),
            dcc.Dropdown(
                id='geography-dropdown',
                options=[{'label': i, 'value': i} for i in df['geography'].unique()],
                multi=True,
                placeholder="Select a Geography"
            ),
        ], style={'width': '30%', 'display': 'inline-block'}),

        html.Div([
            html.H3("Filter by Type"),
            dcc.Dropdown(
                id='type-dropdown',
                options=[{'label': i, 'value': i} for i in df['type'].unique()],
                multi=True,
                placeholder="Select Type"
            ),
        ], style={'width': '30%', 'display': 'inline-block'}),
    ], style={'display': 'flex', 'justify-content': 'space-between', 'margin-bottom': '40px'}),
    html.Div([
    html.H3("Select Geography from Map"),
    dcc.Graph(id='geo-map')
    ], style={'width': '100%', 'margin-bottom': '40px'}),
    # Main Graphs in Rows of 2
    html.Div([
        html.Div([dcc.Graph(id='bag-size-sales-bar')], style={'width': '48%'}),
        html.Div([dcc.Graph(id='total-volume-over-time')], style={'width': '48%'}),
        html.Div([dcc.Graph(id='average-price-over-time')], style={'width': '48%'}),

    ], style={'display': 'flex', 'justify-content': 'space-between'}),

    html.Div([
        html.Div([dcc.Graph(id='price-volume-pie')], style={'width': '48%'}),
        html.Div([dcc.Graph(id='geography-sales-bar')], style={'width': '48%'}),

    ], style={'display': 'flex', 'justify-content': 'space-between'}),

    html.Div([
        html.Div([dcc.Graph(id='price-volume-scatter')], style={'width': '48%'}),
        html.Div([dcc.Graph(id='monthly-trends-line')], style={'width': '48%'}),
    ], style={'display': 'flex', 'justify-content': 'space-between'}),

    # Extra Insights Title
    html.H2("Extra Insights", style={'textAlign': 'center', 'margin-top': '50px'}),

    html.Div([
        html.Div([
            dcc.Graph(
                id='plu-volume',
                figure=px.line(df, x='date', y='total_volume', color='type',
                               title='Sales Volume: Conventional vs Organic',
                               labels={'total_volume': 'Total Volume'})
        )], style={'width': '48%'}),

        html.Div([
            dcc.Graph(
                id='monthly-total-volume',
                figure=px.line(monthly, x='year_month', y='total_volume',
                               title='Monthly Total Sales Volume',
                               labels={'year_month': 'Month', 'total_volume': 'Total Volume'},
                               markers=True)
        )], style={'width': '48%'}),
    ], style={'display': 'flex', 'justify-content': 'space-between'}),

    html.Div([
        html.Div([
            dcc.Graph(
                id='volume-by-type',
                figure=px.line(df_plu, x=df_plu.index, y=['4046', '4225', '4770'],
                               title='Volume Sold by PLU Code',
                               labels={'value': 'Volume Sold'}, markers=True)
        )], style={'width': '48%'}),

        html.Div([
            dcc.Graph(
                id='price-distribution',
                figure=px.histogram(df, x='average_price', nbins=30,
                                    title='Distribution of Avocado Prices',
                                    labels={'average_price': 'Average Price'})
        )], style={'width': '48%'}),
    ], style={'display': 'flex', 'justify-content': 'space-between'}),

    html.Div([
        dcc.Graph(
            id='monthly-trends',
            figure=px.line(monthly_trends, x='month', y=['total_volume', 'average_price'],
                           title='Monthly Trends in Avocado Sales (Seasonality)',
                           labels={'value': 'Value'}, markers=True)
        ),
    ], style={'width': '100%', 'margin-top': '40px'}),
    html.Div([
        html.Div([dcc.Graph(id='monthly-trends-line1')], style={'width': '48%'}),
    ], style={'display': 'flex', 'justify-content': 'space-between'})
])

# Callbacks (keep same as before)
@app.callback(
    [Output('total-volume-over-time', 'figure'),
     Output('average-price-over-time', 'figure'),
     Output('geography-sales-bar', 'figure'),
     Output('bag-size-sales-bar', 'figure'),
     Output('price-volume-scatter', 'figure'),
     Output('monthly-trends-line', 'figure'),
     Output('monthly-trends-line1', 'figure'),
     Output('price-volume-pie', 'figure'),
     Output('geo-map', 'figure')],
    [Input('geography-dropdown', 'value'),
     Input('type-dropdown', 'value'),
     Input('geo-map', 'clickData')]
)
def update_graph(selected_geographies, selected_types,clickData):
    filtered_df = df.copy()
    if selected_geographies:
        filtered_df = filtered_df[filtered_df['geography'].isin(selected_geographies)]
    if selected_types:
        filtered_df = filtered_df[filtered_df['type'].isin(selected_types)]
    # Volume over time
    volume_fig = px.line(filtered_df.groupby('date')['total_volume'].sum().reset_index(),
                         x='date', y='total_volume', title='Total Volume Sold Over Time')

    # Price over time
    price_fig = px.line(filtered_df.groupby('date')['average_price'].mean().reset_index(),
                        x='date', y='average_price', title='Average Price Over Time')

    # Geography bar
    geo_data = filtered_df.groupby('geography')['total_volume'].sum().sort_values(ascending=False).head(20).reset_index()
    geo_fig = px.bar(geo_data, x='total_volume', y='geography', title='Top 20 Geographies',orientation = 'h' )
    # Geography pie
    geo_fig1 = px.pie(geo_data, values='total_volume', names = 'geography', title='Pie of Top 20 Geographies' )

    # Bag size
    bag_data = filtered_df.groupby('year')[['small_bags', 'large_bags', 'xlarge_bags']].sum().reset_index()
    bag_fig = px.bar(bag_data, x='year', y=['small_bags', 'large_bags', 'xlarge_bags'],
                     title='Bag Size Sales by Year')

    # Scatter
    scatter_fig = px.scatter(filtered_df, x='average_price', y='total_volume', color='type',
                             title='Price vs. Volume Sold', opacity=0.6)

    # Monthly trends
    month_data = filtered_df.groupby(filtered_df['date'].dt.to_period('M')).agg({
        'total_volume': 'sum', 'average_price': 'mean'
    }).reset_index()
    month_data['date'] = month_data['date'].astype(str)
    line_fig = go.Figure()
    line_fig.add_trace(go.Scatter(x=month_data['date'], y=month_data['total_volume'],
                                  mode='lines+markers', name='Total Volume'))
    line_fig.add_trace(go.Scatter(x=month_data['date'], y=month_data['average_price'],
                                  mode='lines+markers', name='Average Price', yaxis='y2'))
    line_fig.update_layout(
        title='Monthly Trends in Sales',
        yaxis=dict(title='Total Volume'),
        yaxis2=dict(title='Average Price', overlaying='y', side='right')
    )
        # Create subplots for decomposition components
    line1_fig = make_subplots(rows=2, cols=1, subplot_titles=('Trend', 'Seasonal'))

    # Add traces for trend
    line1_fig.add_trace(go.Scatter(x=monthly_sales.index, y=decomposition.trend, mode='lines', name='Trend'), row=1, col=1)
    # Add traces for seasonal component
    line1_fig.add_trace(go.Scatter(x=monthly_sales.index, y=decomposition.seasonal, mode='lines', name='Seasonal'), row=2, col=1)
    geo_summary = df.groupby('geography')['total_volume'].sum().reset_index()
    geo_map = px.scatter_geo(geo_summary,
                                locations='geography',
                                locationmode='USA-states',  # if US states
                                color='total_volume',
                                scope='usa',
                                title='Avocado Volume by Geography')
    fig = px.choropleth(
    df_map,
    locations='state_code',
    locationmode='USA-states',
    color='total_volume',
    scope='usa',
    title='Avocado Volume by Geography',
    color_continuous_scale='plasma',
    hover_name='geography'  # <--
    )
    return volume_fig, price_fig, geo_fig, bag_fig, scatter_fig, line_fig, line1_fig, geo_fig1, fig

# Run the app
if __name__ == '__main__':
    app.run(debug=True)


  monthly_sales = df['total_volume'].resample('M').sum()

JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



<IPython.core.display.Javascript object>