In [11]:
from plotly.offline import init_notebook_mode, iplot
import pandas as pd
init_notebook_mode(connected=True)

In [12]:
df = pd.read_csv("data/winemag-data-130k-v2.csv.zip")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129971 entries, 0 to 129970
Data columns (total 14 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Unnamed: 0             129971 non-null  int64  
 1   country                129908 non-null  object 
 2   description            129971 non-null  object 
 3   designation            92506 non-null   object 
 4   points                 129971 non-null  int64  
 5   price                  120975 non-null  float64
 6   province               129908 non-null  object 
 7   region_1               108724 non-null  object 
 8   region_2               50511 non-null   object 
 9   taster_name            103727 non-null  object 
 10  taster_twitter_handle  98758 non-null   object 
 11  title                  129971 non-null  object 
 12  variety                129970 non-null  object 
 13  winery                 129971 non-null  object 
dtypes: float64(1), int64(2), object(11)


In [14]:
import plotly.offline as offline

# Scatter plot
scatter_plot = go.Scatter(x=df['points'], y=df['price'], mode='markers', marker=dict(color='blue', size=5))

layout = go.Layout(title='Wine Reviews - Scatter Plot', xaxis=dict(title='Points'), yaxis=dict(title='Price'))

fig = go.Figure(data=[scatter_plot], layout=layout)
offline.plot(fig, filename='scatter_plot.html', auto_open=False)


'scatter_plot.html'

In [15]:
import plotly.express as px

# Count the number of reviews per country
country_counts = df['country'].value_counts().reset_index()
country_counts.columns = ['country', 'count']

# Plot choropleth map
fig = px.choropleth(country_counts, 
                    locations='country', 
                    locationmode='country names',
                    color='count', 
                    hover_name='country', 
                    color_continuous_scale='Viridis',
                    title='Wine Reviews by Country')
fig.update_layout(geo=dict(showcoastlines=True))
offline.plot(fig, filename='choropleth_plot.html', auto_open=False)


'choropleth_plot.html'

In [16]:
import numpy as np

# Calculate average points and prices for each combination of country and variety
heatmap_data = df.groupby(['country', 'variety']).agg({'points': 'mean', 'price': 'mean'}).reset_index()

# Pivot the data for heatmap
heatmap_data_pivot = heatmap_data.pivot(index='variety', columns='country', values='points')

# Plot heatmap
heatmap_fig = go.Figure(data=go.Heatmap(
        z=heatmap_data_pivot.values,
        x=heatmap_data_pivot.columns,
        y=heatmap_data_pivot.index,
        colorscale='Viridis'))

heatmap_fig.update_layout(title='Average Points by Country and Variety',
                          xaxis=dict(title='Country'),
                          yaxis=dict(title='Variety'))
offline.plot(heatmap_fig, filename='heatmap_plot.html', auto_open=False)


'heatmap_plot.html'

In [17]:
# Plot surface plot
surface_fig = go.Figure(data=[go.Surface(z=heatmap_data_pivot.values,
                                         x=heatmap_data_pivot.columns,
                                         y=heatmap_data_pivot.index,
                                         colorscale='Viridis')])
surface_fig.update_layout(title='Surface Plot of Points by Country and Variety',
                          scene=dict(xaxis_title='Country',
                                     yaxis_title='Variety',
                                     zaxis_title='Points'))
offline.plot(surface_fig, filename='surface_plot.html', auto_open=False)


'surface_plot.html'