In [79]:
import dash
from dash import dcc, html
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

wine_df = pd.read_csv('preprocessed_wine_data.csv')

external_stylesheets = [
    {
        "href": "assets/style.css",        
        "rel": "stylesheet",
    },
]


In [80]:
review_df = pd.read_csv('updated_wine_reviews.csv')

In [81]:
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.title = "The Story of Wine: From Grapes to Glass!"

In [82]:
excluded_countries = ['Langhe', 'Valtellina', 'Valpolicella Ripasso Classico', 'Paso Robles']

dropdown_options = [
    {'label': country, 'value': country} 
    for country in wine_df['Country'].unique() 
    if country not in excluded_countries
]

In [83]:
app.layout = html.Div([
    html.Div(
        style={
            'display': 'flex',
            'alignItems': 'center', 
            'justifyContent': 'center',  
            'marginBottom': '20px' 
        },
        children=[
            html.H1("Wine Statistics Dashboard"),  
            html.Img(src='assets/wine.png', style={'width': '100px', 'height': 'auto'}) 
        ]
    ),

    html.Div(
        style={'display': 'flex', 'flexWrap': 'wrap', 'justifyContent': 'space-between'},
        children=[
        # Graph for average price by country
        html.Div(dcc.Graph(id='avg-price-country'), style={'flex': '1 1 30%', 'margin': '10px'}),
            
        # Graph for the Flavor Profile
        html.Div(dcc.Graph(id='flavor-profile'), style={'flex': '1 1 30%', 'margin': '10px'}), 

        # Wine Reviews
        html.Div(dcc.Graph(id='wine-reviews'), style={'flex': '1 1 30%', 'margin': '10px'}),

        ]),
    
    dcc.Dropdown(
        id='country-dropdown',
        options=dropdown_options,
        value=dropdown_options[0]['value'],
        multi=False
    ),
    
    html.Div(
        style={'display': 'flex', 'flexWrap': 'wrap', 'justifyContent': 'space-between'},
        children=[
            
            # Graph for price distribution
            html.Div(dcc.Graph(id='price-distribution'), style={'flex': '1 1 30%', 'margin': '10px'}),
            
            # Graph for price vs. rating
            html.Div(dcc.Graph(id='price-vs-rating'), style={'flex': '1 1 30%', 'margin': '10px'}),
            
            # Graph for the alcohol vs rating
            html.Div(dcc.Graph(id='alcohol-vs-rating'), style={'flex': '1 1 30%', 'margin': '10px'}),

            # Graph for food pairings
            html.Div(dcc.Graph(id='food-pairings'), style={'flex': '1 1 30%', 'margin': '10px'}),

        ]
    )
])

In [84]:
@app.callback(
    dash.dependencies.Output('wine-reviews', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')] 
)
def updated_wine_reviews(selected_country):

    if selected_country:
        filtered_data = review_df[wine_df['Country'] == selected_country]
    else:
        filtered_data = review_df

    talks_about_counts = filtered_data['talks_about'].value_counts().reset_index()
    talks_about_counts.columns = ['Category', 'Count']

    fig = go.Figure(
        data=[go.Pie(
            labels=talks_about_counts['Category'],
            values=talks_about_counts['Count'],
            hole=0.3
        )]
    )

    fig.update_layout(
        title="Types of Wine Reviews" if selected_country else "Types of Wine Reviews",
        height=600
    )

    return fig

In [85]:
@app.callback(
    dash.dependencies.Output('avg-price-country', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')] 
)
def update_avg_price_country(selected_country):
    filtered_df = wine_df[wine_df['Country'].isin([option['value'] for option in dropdown_options])]
    
    avg_price = filtered_df.groupby('Country')['Price'].mean().reset_index()
    
    fig = px.bar(avg_price, x='Country', y='Price', title='Average Price by Country')
    
    return fig

In [86]:
@app.callback(
    dash.dependencies.Output('flavor-profile', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')]
)
def update_flavor_profile(selected_country):
    flavor_columns = ['Bold', 'Tannin', 'Sweet', 'Acidic']
    
    overall_avg_flavors = wine_df[flavor_columns].mean().reset_index()
    overall_avg_flavors.columns = ['Flavor', 'Average']

    fig = go.Figure(data=go.Pie(
        labels=overall_avg_flavors['Flavor'],
        values=overall_avg_flavors['Average'],
        hole=0.3 
    ))

    fig.update_layout(
        title='Average Flavor Profiles',
        height=600
    )
    
    return fig

In [87]:
@app.callback(
    dash.dependencies.Output('price-distribution', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')]
)
def update_price_distribution(selected_country):
    filtered_df = wine_df[wine_df['Country'] == selected_country]
    fig = px.box(filtered_df, y='Price', title='Price Distribution')
    return fig

In [88]:
@app.callback(
    dash.dependencies.Output('food-pairings', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')]  
)
def update_food_pairings(selected_country):
    filtered_df = wine_df[wine_df['Country'] == selected_country]
    food_columns = wine_df.columns[12:]  # Food pairing columns start from the 13th column
    food_counts = wine_df[food_columns].sum().reset_index()
    food_counts.columns = ['Food', 'Count']
    fig = px.bar(food_counts, x='Food', y='Count', title='Food Pairings Count')
    return fig

In [89]:
@app.callback(
    dash.dependencies.Output('price-vs-rating', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')]  
)
def update_price_vs_rating(selected_country):
    filtered_df = wine_df[wine_df['Country'] == selected_country]
    filtered_df = filtered_df.dropna(subset=['Price', 'Rating'])
    fig = px.scatter(filtered_df, x='Price', y='Rating', title='Price vs. Rating', trendline='ols')
    return fig

In [90]:
@app.callback(
    dash.dependencies.Output('alcohol-vs-rating', 'figure'),
    [dash.dependencies.Input('country-dropdown', 'value')]
)
def update_alcohol_vs_rating(selected_country):
    filtered_df = wine_df[wine_df['Country'] == selected_country]
    filtered_df = filtered_df.dropna(subset=['Alcohol content', 'Rating'])  
   
    avg_alcohol_by_rating = filtered_df.groupby('Rating')['Alcohol content'].mean().reset_index()
    
    fig = px.line(avg_alcohol_by_rating, x='Rating', y='Alcohol content', title='Average Alcohol Content by Rating')
    return fig

In [78]:
if __name__ == '__main__':
    app.run_server(debug=True)

---------------------------------------------------------------------------
UnboundLocalError                         Traceback (most recent call last)
Cell In[71], line 10, in updated_wine_reviews(selected_country='Australia')
      1 @app.callback(
      2     dash.dependencies.Output('wine-reviews', 'figure'),
      3     [dash.dependencies.Input('country-dropdown', 'value')] 
      4 )
      5 def updated_wine_reviews(selected_country):
      7     if selected_country:
      8     #     filtered_data = review_df[wine_df['Country'] == selected_country]
      9     # Combine the relevant 'Country' data into 'review_df'
---> 10         review_df = review_df.join(wine_df['Country'])
        review_df =                                                 review  \
0    Easily the best Sauvignon Blanc I've had to da...   
1                        Excellent, Mockingbird Inn?\n   
2    Absolutely incredible. One of the best sauvign...   
3                     Reported in nytimes. Did not try  