In [9]:
import pandas as pd


plots_task = pd.read_csv('data/winemag-data-130k-v2.csv.zip')

In [10]:
plots_task.columns

Index(['Unnamed: 0', 'country', 'description', 'designation', 'points',
       'price', 'province', 'region_1', 'region_2', 'taster_name',
       'taster_twitter_handle', 'title', 'variety', 'winery'],
      dtype='object')

In [21]:
plots_task.isnull().sum()

Unnamed: 0               0
country                  0
description              0
designation              0
points                   0
price                    0
province                 0
region_1                 0
region_2                 0
taster_name              0
taster_twitter_handle    0
title                    0
variety                  0
winery                   0
dtype: int64

In [20]:
 plots_task.dropna(inplace=True)


In [15]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo


pyo.init_notebook_mode(connected=True)

In [16]:
plots_task.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [32]:
# Scatter plot of price vs. variety
scatter_plot = px.scatter(
    surface_data,
    x='taster_name',
    y='price',
    color='province',
    hover_name='title',
    title='Price vs. Variety by Province',
    labels={'variety': 'Wine Variety', 'price': 'Price (USD)'}
)

pyo.iplot(scatter_plot)


In [24]:
country_avg_points = plots_task.groupby('country')['points'].mean().reset_index()

choropleth_plot = px.choropleth(
    country_avg_points,
    locations='country',
    locationmode='country names',
    color='points',
    hover_name='country',
    color_continuous_scale='Viridis',
    title='Average Wine Rating by Country'
)

pyo.iplot(choropleth_plot)


In [28]:
province_avg_price = plots_task.groupby('province')['price'].mean().reset_index()

heatmap_plot = px.density_heatmap(
    province_avg_price,
    x='province',
    y='price',
    title='Heatmap of Average Price by Province',
    labels={'price': 'Average Price (USD)', 'province': 'Province'}
)

pyo.iplot(heatmap_plot)


In [30]:

province_num = {p: i for i, p in enumerate(province_avg_price['province'])}

z = []
for p in province_avg_price['province']:
    z_row = []
    for pr in [min(plots_task['price']), max(plots_task['price'])]:
        avg_points = plots_task[(plots_task['province'] == p) & (plots_task['price'] == pr)]['points'].mean()
        z_row.append(avg_points if pd.notna(avg_points) else 0)
    z.append(z_row)

surface_plot = go.Surface(
    z=z,
    x=[province_num[p] for p in province_avg_price['province']],
    y=[min(plots_task['price']), max(plots_task['price'])],
    colorscale='Viridis'
)

layout = go.Layout(
    title='Surface Plot of Points by Province and Price',
    scene=dict(
        xaxis=dict(title='Province', tickvals=list(province_num.values()), ticktext=list(province_num.keys())),
        yaxis=dict(title='Price'),
        zaxis=dict(title='Points')
    )
)

fig = go.Figure(data=[surface_plot], layout=layout)
pyo.iplot(fig)
