In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

reviews = pd.read_csv("data/winemag-data-130k-v2.csv.zip", index_col = 0)

In [2]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = True)

In [3]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 129971 entries, 0 to 129970
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   country                129908 non-null  object 
 1   description            129971 non-null  object 
 2   designation            92506 non-null   object 
 3   points                 129971 non-null  int64  
 4   price                  120975 non-null  float64
 5   province               129908 non-null  object 
 6   region_1               108724 non-null  object 
 7   region_2               50511 non-null   object 
 8   taster_name            103727 non-null  object 
 9   taster_twitter_handle  98758 non-null   object 
 10  title                  129971 non-null  object 
 11  variety                129970 non-null  object 
 12  winery                 129971 non-null  object 
dtypes: float64(1), int64(1), object(11)
memory usage: 13.9+ MB


In [4]:
# Draw the relation between points and prices
price_points = reviews[['points', 'price']].dropna()
df = price_points.sample(100)

In [17]:
prc_pt_fig = px.scatter(df, x = "points", y = "price", log_y = True,
                title="Prcie versus Points")
prc_pt_fig.update_layout(
    title='Price and points relation')

In [6]:
geo_map = reviews[['country', 'price']].groupby('country')['price'].sum()/1000
geo_map = pd.DataFrame({'country': geo_map.index, 'price': geo_map.values})

In [7]:
geo_map.max()

country    Uruguay
price      1984.66
dtype: object

In [8]:
geo_map_fig = go.Figure(data = go.Choropleth(
                        locations = geo_map['country'],
                        z = geo_map['price'],
                         locationmode = 'country names',
                        colorscale = 'Reds',
                        colorbar_title = "price"))

In [9]:
geo_map_fig.show()

In [10]:
variety_series = ['Pinot Noir', 'Chardonnay', 'Cabernet Sauvignon', 'Red Blend', 'Bordeaux-style Red Blend']
country_series = ['US', 'France', 'Italy', 'Spain', 'Portugal']
heat_map = reviews[reviews['variety'].isin(variety_series) & reviews['country'].isin(country_series)]

In [11]:
heat_map

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
10,US,"Soft, supple plum envelopes an oaky structure ...",Mountain Cuvée,87,19.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Kirkland Signature 2011 Mountain Cuvée Caberne...,Cabernet Sauvignon,Kirkland Signature
12,US,"Slightly reduced, this wine offers a chalky, t...",,87,34.0,California,Alexander Valley,Sonoma,Virginie Boone,@vboone,Louis M. Martini 2012 Cabernet Sauvignon (Alex...,Cabernet Sauvignon,Louis M. Martini
14,US,Building on 150 years and six generations of w...,,87,12.0,California,Central Coast,Central Coast,Matt Kettmann,@mattkettmann,Mirassou 2012 Chardonnay (Central Coast),Chardonnay,Mirassou
20,US,Ripe aromas of dark berries mingle with ample ...,Vin de Maison,87,23.0,Virginia,Virginia,,Alexander Peartree,,Quiévremont 2012 Vin de Maison Red (Virginia),Red Blend,Quiévremont
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129943,Italy,"A blend of Nero d'Avola and Syrah, this convey...",Adènzia,90,29.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Baglio del Cristo di Campobello 2012 Adènzia R...,Red Blend,Baglio del Cristo di Campobello
129947,Italy,"A blend of 65% Cabernet Sauvignon, 30% Merlot ...",Symposio,90,20.0,Sicily & Sardinia,Terre Siciliane,,Kerin O’Keefe,@kerinokeefe,Feudo Principi di Butera 2012 Symposio Red (Te...,Red Blend,Feudo Principi di Butera
129949,US,There's no bones about the use of oak in this ...,Barrel Fermented,90,35.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Flora Springs 2013 Barrel Fermented Chardonnay...,Chardonnay,Flora Springs
129960,Portugal,"Fresh and fruity, this is full of red cherry f...",Vértice,90,48.0,Douro,,,Roger Voss,@vossroger,Caves Transmontanas 2006 Vértice Pinot Noir (D...,Pinot Noir,Caves Transmontanas


In [12]:
heat_map_fig = go.Figure(data=go.Heatmap(
                   z=heat_map['points'],
                   x=heat_map['country'],
                   y=heat_map['variety'],
                   connectgaps = False,
                   colorscale = 'Earth'))

heat_map_fig.update_layout(
    title='Top 5 Wine varities over countries')



heat_map_fig.show()

In [15]:
price = reviews['price'].sample(100).dropna()
z_z = np.outer(price, price)

In [16]:
sur_plot_fig = go.Figure(data=[go.Surface(z=z_z)])

sur_plot_fig.update_layout(title='Price diag', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90))

sur_plot_fig.show()