# Wine Magazine Reviews

In [61]:
import pandas as pd

wine_reviews = pd.read_csv("Dataset/winemag-data-130k-v2.csv", index_col=0)
wine_reviews

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


### shape() 
returns (rows, cols)

In [None]:
wine_reviews.shape

### T
Transpose

In [None]:
wine_reviews.T

### head()

by default returns first five rows
Else any integer can be passed as an argument

In [None]:
wine_reviews.head()

In [None]:
wine_reviews.head(10)

### tail()
by default last five rows

In [None]:
wine_reviews.tail()

### Native Accessors

In [None]:
wine_reviews.country

In [None]:
wine_reviews['country'][0]

In [None]:
df = wine_reviews.loc[[0, 1, 10, 100], ["country", "province", "region_1", "region_2"]]
df

In [None]:
wine_reviews.iloc[0]

In [None]:
wine_reviews.iloc[:3, 0]

In [None]:
wine_reviews.iloc[1:3, 0]

In [None]:
wine_reviews.loc[:10, ['taster_name', 'taster_twitter_handle', 'points']]

In [None]:
first_descriptions = wine_reviews.description.iloc[:10]
first_descriptions

### Conditional Formatting

In [None]:
wine_reviews.country == 'Italy'

In [None]:
wine_reviews.loc[(wine_reviews.country == 'Italy') & (wine_reviews.points <= 90)]

### isin()
lets you select data from multiple matching values

In [None]:
wine_reviews.loc[wine_reviews.country.isin(['Italy', 'France'])]

In [None]:
sample_reviews = wine_reviews.loc[wine_reviews.index.isin([1, 2, 3, 5, 8])]
sample_reviews

In [None]:
top_oceania_wines = wine_reviews.loc[(wine_reviews.points >= 95) & (wine_reviews.country.isin(['Australia','New Zealand']))]


### notnull()

In [None]:
wine_reviews.loc[wine_reviews.price.notnull()]

### isnull()

In [None]:
wine_reviews.loc[wine_reviews.price.isnull()]

### Summary Functions

#### describe()
generates a high-level summary of the attributes of the given column.

In [None]:
wine_reviews.points.describe()

In [None]:
wine_reviews.country.describe()

In [None]:
wine_reviews.points.mean()

In [None]:
wine_reviews.country.unique() 

In [None]:
wine_reviews.country.value_counts() # count of reviews of wines from each country.

In [None]:
wine_reviews.taster_name.value_counts()

In [None]:
median_points = wine_reviews.points.median()
median_points

In [None]:
centered_price = wine_reviews.price - wine_reviews.price.mean()
centered_price

In [None]:
bargain_wine = wine_reviews.loc[(wine_reviews.points / wine_reviews.price).idxmax(), 'title']
bargain_wine

In [None]:
n_trop = wine_reviews.description.map(lambda desc: "tropical" in desc).sum()
n_fruity = wine_reviews.description.map(lambda desc: "fruity" in desc).sum()
descriptor_counts = pd.Series([n_trop, n_fruity], index=['tropical', 'fruity'])
descriptor_counts

A score of 95 or higher counts as 3 stars, a score of at least 85 but less than 95 is 2 stars. Any other score is 1 star.

Also, the Canadian Vintners Association bought a lot of ads on the site, so any wines from Canada should automatically get 3 stars, regardless of points.

In [None]:
def stars(row):
    if row.country == 'Canada':
        return 3
    elif row.points >= 95:
        return 3
    elif row.points >= 85:
        return 2
    else:
        return 1

star_ratings = wine_reviews.apply(stars, axis='columns')
star_ratings

In [None]:
wine_reviews.points.value_counts()

Another method to do:

### groupby()

In [None]:
wine_reviews.groupby('points').points.count()

In [None]:
wine_reviews.groupby(['country']).price.agg([len, min, max])

In [None]:
wine_reviews.groupby(['country', 'province']).apply(lambda df: df.loc[df.points.idxmax()])


In [None]:
# for only Canada's result
wine_reviews[wine_reviews['country'] == 'Canada'].groupby(['country', 'province']).apply(lambda df: df.loc[df.points.idxmax()])

In [None]:
countries_reviewed = wine_reviews.groupby(['country', 'province']).description.agg([len])
countries_reviewed

### Sorting

> ascending

In [None]:
countries_reviewed.sort_values(by='len')

> Descending

In [None]:
countries_reviewed.sort_values(by='len', ascending=False)

In [None]:
countries_reviewed.sort_values(by=['country', 'len'])

In [None]:
wine_reviews.groupby('taster_twitter_handle').taster_twitter_handle.count()
# or
# wine_reviews.groupby('taster_twitter_handle').size()

In [None]:
wine_reviews.groupby('price').points.max().sort_index()

In [None]:
price_extremes = wine_reviews.groupby('variety').price.agg(['min','max'])
price_extremes

In [None]:
price_extremes.sort_values(by = ['min','max'], ascending = False)

In [None]:
reviewer_mean_ratings = wine_reviews.groupby('taster_name').points.mean()
reviewer_mean_ratings

In [None]:
wine_reviews.groupby(['country', 'variety']).size().sort_values(ascending = False)

### Data Types

In [None]:
wine_reviews.dtypes

### Missing Values

In [None]:
wine_reviews[pd.isnull(wine_reviews.country)]

In [None]:
wine_reviews.region_1

In [None]:
# filling NaN values with Unknown
wine_reviews.region_1.fillna("Unknown")

In [None]:
wine_reviews.taster_twitter_handle.replace("@kerinokeefe", "@kerino")

In [None]:
point_strings = wine_reviews.points.astype(str)
point_strings

In [None]:
n_missing_prices = wine_reviews.price.isnull().sum()
n_missing_prices

Other same codes

`n_missing_prices = pd.isnull(wine_reviews.price).sum()`

or

`missing_price_reviews = wine_reviews[wine_reviews.price.isnull()]`

`n_missing_prices = len(missing_price_reviews)`

In [None]:
reviews_per_region = wine_reviews.region_1.fillna('Unknown').value_counts().sort_values(ascending=False)
reviews_per_region

### Renaming

In [None]:
wine_reviews.rename(columns={'points': 'score'})

In [None]:
wine_reviews.rename(columns={'region_1': 'region'}).rename(columns={'region_2':'locale'})

In [None]:
wine_reviews.rename(index={0: 'firstEntry', 1: 'secondEntry'})

In [None]:
wine_reviews.rename_axis("wines", axis='rows').rename_axis("fields", axis='columns')

### Combining

`combined_data = pd.concat([dataset1, dataset2])`

`combined_data = dataset1.set_index("MeetID").join(dataset2.set_index("MeetID"))`

### Drop Duplicates

In [67]:
unique_reviews = wine_reviews.drop_duplicates(subset=["country","description","designation","points","price","province","region_1","region_2","taster_name","title","variety","winery"])
unique_reviews

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


In [68]:
unique_reviews["country"].value_counts(sort = True)

country
US                        50457
France                    20353
Italy                     17940
Spain                      6116
Portugal                   5256
Chile                      4184
Argentina                  3544
Austria                    3034
Australia                  2197
Germany                    1992
South Africa               1301
New Zealand                1278
Israel                      466
Greece                      432
Canada                      226
Bulgaria                    132
Hungary                     129
Romania                     102
Uruguay                      98
Turkey                       81
Slovenia                     77
Georgia                      76
Croatia                      70
Mexico                       68
England                      63
Moldova                      56
Brazil                       49
Lebanon                      32
Morocco                      24
Peru                         16
Ukraine                      14


In [69]:
unique_reviews["country"].value_counts(sort = True, normalize=True)

country
US                        0.420724
France                    0.169709
Italy                     0.149589
Spain                     0.050997
Portugal                  0.043826
Chile                     0.034887
Argentina                 0.029551
Austria                   0.025298
Australia                 0.018319
Germany                   0.016610
South Africa              0.010848
New Zealand               0.010656
Israel                    0.003886
Greece                    0.003602
Canada                    0.001884
Bulgaria                  0.001101
Hungary                   0.001076
Romania                   0.000851
Uruguay                   0.000817
Turkey                    0.000675
Slovenia                  0.000642
Georgia                   0.000634
Croatia                   0.000584
Mexico                    0.000567
England                   0.000525
Moldova                   0.000467
Brazil                    0.000409
Lebanon                   0.000267
Morocco     