In [17]:
import pandas as pd

In [18]:
reviews = pd.read_csv("../data/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
pd.set_option('display.max_rows', 5)
reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [19]:
reviews.points.median()

88.0

In [20]:
reviews.country.unique()

array(['Italy', 'Portugal', 'US', 'Spain', 'France', 'Germany',
       'Argentina', 'Chile', 'Australia', 'Austria', 'South Africa',
       'New Zealand', 'Israel', 'Hungary', 'Greece', 'Romania', 'Mexico',
       'Canada', nan, 'Turkey', 'Czech Republic', 'Slovenia',
       'Luxembourg', 'Croatia', 'Georgia', 'Uruguay', 'England',
       'Lebanon', 'Serbia', 'Brazil', 'Moldova', 'Morocco', 'Peru',
       'India', 'Bulgaria', 'Cyprus', 'Armenia', 'Switzerland',
       'Bosnia and Herzegovina', 'Ukraine', 'Slovakia', 'Macedonia',
       'China', 'Egypt'], dtype=object)

In [21]:
reviews_per_country = reviews.groupby('country').size()
reviews_per_country


country
Argentina    3800
Armenia         2
             ... 
Ukraine        14
Uruguay       109
Length: 43, dtype: int64

In [22]:
reviews_per_country = reviews.country.value_counts()
reviews_per_country

US        54504
France    22093
          ...  
China         1
Egypt         1
Name: country, Length: 43, dtype: int64

In [24]:
centered_price = reviews.price.map(lambda x: x - reviews.price.mean())
centered_price

0               NaN
1        -20.363389
            ...    
129969    -3.363389
129970   -14.363389
Name: price, Length: 129971, dtype: float64

In [25]:
bargain_wine_idx = (reviews.points / reviews.price).idxmax()
bargain_wine = reviews.loc[bargain_wine_idx, "title"]
bargain_wine

'Bandit NV Merlot (California)'

In [26]:
reviews.description.str.contains("tropical").sum()


3607

In [27]:
reviews.description.str.contains("fruity").sum()

9090

In [28]:
tropical  = reviews.description.str.contains("tropical")
tropical

0          True
1         False
          ...  
129969    False
129970    False
Name: description, Length: 129971, dtype: bool

In [29]:
tropical = reviews.description.map(lambda x: "tropical" in x)
tropical

0          True
1         False
          ...  
129969    False
129970    False
Name: description, Length: 129971, dtype: bool

In [30]:
fruity = reviews.description.map(lambda x: "fruity" in x)
fruity

0         False
1          True
          ...  
129969    False
129970    False
Name: description, Length: 129971, dtype: bool

In [32]:
descriptor_counts = pd.Series([tropical.sum(), fruity.sum()], index=["tropical", "fruity"], name="descriptor_counts")
descriptor_counts

tropical    3607
fruity      9090
Name: descriptor_counts, dtype: int64

In [47]:
def star_scheme(x):
    if  ((x.points >= 95) | (x.country == "Canada")):
        x.stars = 3
    elif (x.points < 95) and (x.points >= 85):
        x.stars = 2
    else:
        x.stars = 1
    
    return x

In [48]:
# reviews.points.map(star_scheme)

In [50]:
reviews_star = reviews.apply(star_scheme, axis=1)    # axis=1 means apply to columns
reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,stars
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,2
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,2
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,2
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,2
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,2


In [57]:
reviews_star.loc[(reviews.points < 95) & (reviews.country == "Canada")]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,stars
454,Canada,"An aromatic knockout with notes of peach, papa...",Reserve Icewine,92,30.0,Ontario,Niagara-On-The-Lake,,Sean P. Sullivan,@wawinereport,Pillitteri 2012 Reserve Icewine Vidal (Niagara...,Vidal,Pillitteri,3
2616,Canada,"A slightly earthy, spicy nose leads, followed ...",Fusion,83,12.0,Ontario,Niagara Peninsula,,Susan Kostrzewa,@suskostrzewa,Pillitteri 2004 Fusion Gewürztraminer-Riesling...,Gewürztraminer-Riesling,Pillitteri,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129528,Canada,A delicious though somewhat reserved wine with...,Icewine,89,45.0,Ontario,Niagara Peninsula,,Sean P. Sullivan,@wawinereport,Henry of Pelham 2011 Icewine Vidal (Niagara Pe...,Vidal,Henry of Pelham,3
129581,Canada,"Smooth and engaging, this offers classic varie...",,90,38.0,British Columbia,Okanagan Valley,,Paul Gregutt,@paulgwine,Burrowing Owl 2013 Syrah (Okanagan Valley),Syrah,Burrowing Owl,3
