# SQL vs Python Pandas

目的：对于同一种数据查询/操作需求，比较SQL和Python Pandas的操作。

说明：
- 我的学习路径是先SQL后Python Pandas，所以本文也按照这个顺序。
- SQL的方言是TSQL(SqlServer)


In [1]:
import pandas as pd

In [2]:
wine_reviews = pd.read_csv("./data/winemag-data/winemag-data-130k-v2.csv", index_col=0)

In [3]:
wine_reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


## DDL
- ADD COLUMN
- DROP COLUMN
- ADD INDEX
- DROP INDEX

In [4]:
# --  ADD NEW COLUMN 
# ALTER TABLE WINE_REVIEWS_NEW ADD price_point_ratio DECIMAL(17,4) NULL
# UPDATE WINE_REVIEWS_NEW SET price_point_ratio = price/(case when points=0 then 1 else points end) 

wine_reviews['price_point_ratio'] = wine_reviews['price']/wine_reviews['points']
wine_reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,price_point_ratio
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,0.172414
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,0.16092
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,0.149425
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,0.747126


In [5]:
# --  DROP COLUMN 
# ALTER TABLE WINE_REVIEWS_NEW DROP COLUMN price_point_ratio

wine_reviews = wine_reviews.drop(['price_point_ratio'], axis = 1) # drop column based on column name
# wine_reviews = wine_reviews.drop(wine_reviews.columns[13], axis = 1) # drop column based on column name

In [6]:
# -- ADD INDEX
# CREATE NONCLUSTERED INDEX IDX_WINE_REVIEWS_NEW_PROVINCE ON WINE_REVIEWS_NEW
# (
# 	province 
# )
wine_test = wine_reviews.set_index(['province']) # set province column as index
wine_test.head()
## note: the original index column will be overwritten, so copy the original index to another column before-hand if neccessary. see above
# wine_test['province'] =wine_test.index 

Unnamed: 0_level_0,country,description,designation,points,price,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Sicily & Sardinia,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
Douro,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
Oregon,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
Michigan,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
Oregon,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [7]:
# -- DROP INDEX
# DROP INDEX IDX_WINE_REVIEWS_NEW_PROVINCE ON WINE_REVIEWS_NEW

# there no method for dropping index, you simply set index to another column 
wine_test = wine_test.set_index(['country']) 

## DML
- SELECT VALUES
- SELECT ... WHERE AND , OR , IS NULL, IS NOT NULL, IN, 
- ORDER BY 
- COLUMN OPERATION
- DISTINCT VALUES
- GROUP BY 
- JOIN 
  - INNER JOIN 
  - LEFT JOIN 
  - FULL JOIN 

In [8]:
# SELECT TOP 10 * FROM WINE_REVIEWS_NEW
wine_reviews.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [9]:
# SELECT...WHERE 
wine_reviews.loc[wine_reviews['country'] == 'US']  # or wine_reviews[wine_reviews['country']=='US']
# WHERE AND
wine_reviews.loc[(wine_reviews['country'] == 'US') & (wine_reviews['points']>=90)] 
# WHERE OR
wine_reviews.loc[(wine_reviews['country'] == 'US') | (wine_reviews['points']>=90)] 
# WHERE IN
wine_reviews.loc[wine_reviews['country'].isin(['US','Italy'])] 
# WHERE NOT IN 
wine_reviews.loc[~(wine_reviews['country'].isin(['US','Italy']))] 
# WHERE IS NULL
wine_reviews.loc[(wine_reviews['country'].isnull())]  # or isna()
# WHERE IS NOT NULL
wine_reviews.loc[(wine_reviews['country'].notnull())]  # or notna()


Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


Note: **the use of DataFrame.loc[] method**

Allowed inputs are:
- A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
    interpreted as a *label* of the index, and **never** as an
    integer position along the index).
- A list or array of labels, e.g. ``['a', 'b', 'c']``.
- A slice object with labels, e.g. ``'a':'f'``.
- A boolean array of the same length as the axis being sliced,
    e.g. ``[True, False, True]``.
- A ``callable`` function with one argument (the calling Series or
    DataFrame) and that returns valid output for indexing (one of the above)


In [10]:
# SELECT * FROM WINE_REVIEWS_NEW ORDER BY COUNTRY
wine_reviews.sort_values(by='country')

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
90901,Argentina,Odd aromas of bath soap and tropical fruits ar...,Valle Las Acequias Clase A,81,9.0,Mendoza Province,Mendoza,,Michael Schachner,@wineschach,Luis Segundo Correas 2015 Valle Las Acequias C...,Chardonnay,Luis Segundo Correas
122372,Argentina,"Malty, earthy aromas of cassis and prune are t...",,86,15.0,Mendoza Province,Mendoza,,Michael Schachner,@wineschach,Bodega Familia Barberis 2010 Cabernet Sauvigno...,Cabernet Sauvignon,Bodega Familia Barberis
122374,Argentina,"This SB delivers citrus peel, grass and other ...",Ciclos Fume,86,28.0,Other,Cafayate,,Michael Schachner,@wineschach,Michel Torino 2013 Ciclos Fume Sauvignon Blanc...,Sauvignon Blanc,Michel Torino
4814,Argentina,"Opens with aromas of tomato leaf, fresh salsa ...",Reserva,85,15.0,Other,Neuquén,,Michael Schachner,@wineschach,Chaltén 2007 Reserva Pinot Noir (Neuquén),Pinot Noir,Chaltén
4806,Argentina,"A heavy, dark and loud Malbec with bullish flo...",,85,20.0,Mendoza Province,Uco Valley,,Michael Schachner,@wineschach,DiamAndes 2010 Malbec (Uco Valley),Malbec,DiamAndes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
124176,,This Swiss red blend is composed of four varie...,Les Romaines,90,30.0,,,,Jeff Jenssen,@worldwineguys,Les Frères Dutruy 2014 Les Romaines Red,Red Blend,Les Frères Dutruy
129407,,Dry spicy aromas of dusty plum and tomato add ...,Reserve,89,22.0,,,,Michael Schachner,@wineschach,El Capricho 2015 Reserve Cabernet Sauvignon,Cabernet Sauvignon,El Capricho
129408,,El Capricho is one of Uruguay's more consisten...,Reserve,89,22.0,,,,Michael Schachner,@wineschach,El Capricho 2015 Reserve Tempranillo,Tempranillo,El Capricho
129590,,"A blend of 60% Syrah, 30% Cabernet Sauvignon a...",Shah,90,30.0,,,,Mike DeSimone,@worldwineguys,Büyülübağ 2012 Shah Red,Red Blend,Büyülübağ


In [11]:
# SELECT * FROM WINE_REVIEWS_NEW 
# ORDER BY points DESC, price DESC
# OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY
wine_reviews.sort_values(by=['points','price'], ascending = False).head(5)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
111753,France,"Almost black in color, this stunning wine is g...",,100,1500.0,Bordeaux,Pauillac,,Roger Voss,@vossroger,Château Lafite Rothschild 2010 Pauillac,Bordeaux-style Red Blend,Château Lafite Rothschild
111755,France,This is the finest Cheval Blanc for many years...,,100,1500.0,Bordeaux,Saint-Émilion,,Roger Voss,@vossroger,Château Cheval Blanc 2010 Saint-Émilion,Bordeaux-style Red Blend,Château Cheval Blanc
122935,France,"Full of ripe fruit, opulent and concentrated, ...",,100,848.0,Bordeaux,Pessac-Léognan,,Roger Voss,@vossroger,Château Haut-Brion 2014 Pessac-Léognan,Bordeaux-style White Blend,Château Haut-Brion
114972,Portugal,"A powerful and ripe wine, strongly influenced ...",Nacional Vintage,100,650.0,Port,,,Roger Voss,@vossroger,Quinta do Noval 2011 Nacional Vintage (Port),Port,Quinta do Noval
89729,France,This new release from a great vintage for Char...,Le Mesnil Blanc de Blancs Brut,100,617.0,Champagne,Champagne,,Roger Voss,@vossroger,Salon 2006 Le Mesnil Blanc de Blancs Brut Char...,Chardonnay,Salon


In [12]:
# COLUMN OPERATION: SELECT points/price as pp_ratio FROM WINE_REVIEWS_NEW
wine_reviews['points']/wine_reviews['price']

0              NaN
1         5.800000
2         6.214286
3         6.692308
4         1.338462
            ...   
129966    3.214286
129967    1.200000
129968    3.000000
129969    2.812500
129970    4.285714
Length: 129971, dtype: float64

In [13]:
# SELECT DISTINCT COUNTRY FROM WINE_REVIEWS_NEW
wine_reviews['country'].unique() # retrun an array of distinct values

array(['Italy', 'Portugal', 'US', 'Spain', 'France', 'Germany',
       'Argentina', 'Chile', 'Australia', 'Austria', 'South Africa',
       'New Zealand', 'Israel', 'Hungary', 'Greece', 'Romania', 'Mexico',
       'Canada', nan, 'Turkey', 'Czech Republic', 'Slovenia',
       'Luxembourg', 'Croatia', 'Georgia', 'Uruguay', 'England',
       'Lebanon', 'Serbia', 'Brazil', 'Moldova', 'Morocco', 'Peru',
       'India', 'Bulgaria', 'Cyprus', 'Armenia', 'Switzerland',
       'Bosnia and Herzegovina', 'Ukraine', 'Slovakia', 'Macedonia',
       'China', 'Egypt'], dtype=object)

In [21]:
# SELECT DISTINCT COUNTRY FROM WINE_REVIEWS_NEW
wine_reviews['country'].value_counts().head() # return a Series
# wine_reviews.groupby(['country'])['description'].agg([len]) # return a df
# wine_reviews.groupby(['country']).size() # return a Series
# wine_reviews.groupby(['country']).description.count() # return a Series

US          54504
France      22093
Italy       19540
Spain        6645
Portugal     5691
Name: country, dtype: int64

Note:

- The use of `agg()`:
  - input: list of functions that takes lists return a single value, e.g. len(), max(), min().
  - output: a dataframe 
  - it could either apply to every column or one column (if specified)
- Difference between `size()` and `count()`
  - `size()`: compute group size, including null values.
  - `count()`: Compute count of group, excluding missing values.
 

In [22]:
# GROUP BY
# SELECT AVG(PRICE) FROM WINE_REVIEWS_NEW GROUP BY COUNTRY
wine_reivews_country= wine_reviews.groupby(['country'])
wine_reivews_country.price.mean().head() # return a Series

country
Argentina                 24.510117
Armenia                   14.500000
Australia                 35.437663
Austria                   30.762772
Bosnia and Herzegovina    12.500000
Name: price, dtype: float64

```sql
-- CREATE TABLE: wine_taster, and add a taster_level column 
SELECT taster_name,taster_twitter_handle INTO WINE_TASTER 
FROM WINE_REVIEWS_NEW
WHERE LEN(taster_name)>0 
GROUP BY taster_name,taster_twitter_handle

ALTER TABLE WINE_TASTER ADD taster_level int 

UPDATE WINE_TASTER SET taster_level = CASE WHEN len(taster_twitter_handle)>0 THEN 5 WHEN (upper(taster_name) like '%S%' OR  upper(taster_name) like '%M%') THEN 4 ELSE 3 END
SELECT  * FROM WINE_TASTER
```

In [23]:
# JOIN
# CREATE TABLE: wine_taster, and add a taster_level column 
wine_taster = wine_reviews.loc[wine_reviews['taster_name'].notnull()]
wine_taster = wine_taster.loc[:,['taster_name','taster_twitter_handle']].fillna('NA').groupby(['taster_name','taster_twitter_handle']).size()
wine_taster = wine_taster.reset_index().drop(columns=0)
def get_taster_level(row):
    level = None
    if row['taster_twitter_handle'] != 'NA':
        level = 5
    elif ('S' in  row['taster_name'].upper()) or ('M' in  row['taster_name'].upper()):
        level = 4
    else:
        level = 3
    
    return level  

wine_taster['taster_level'] = wine_taster.apply(get_taster_level, axis='columns')
wine_taster.head()


Unnamed: 0,taster_name,taster_twitter_handle,taster_level
0,Alexander Peartree,,3
1,Anna Lee C. Iijima,,4
2,Anne Krebiehl MW,@AnneInVino,5
3,Carrie Dykes,,4
4,Christina Pickard,@winewchristina,5


In [17]:
# JOIN
# SELECT A.*, B.taster_level FROM WINE_REVIEWS_NEW A
# LEFT JOIN WINE_TASTER B ON A.taster_name= B.taster_name
left_df = wine_reviews.set_index('taster_name')
right_df = wine_taster.set_index('taster_name')
left_df.join(right_df, lsuffix='_REVIEW', rsuffix='_LEVEL') # left join 
# left_df.join(right_df, how='inner', lsuffix='_REVIEW', rsuffix='_LEVEL') # inner join 
# left_df.join(right_df, how='outer', lsuffix='_REVIEW', rsuffix='_LEVEL') # full outer join 

Unnamed: 0_level_0,country,description,designation,points,price,province,region_1,region_2,taster_twitter_handle_REVIEW,title,variety,winery,taster_twitter_handle_LEVEL,taster_level
taster_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Alexander Peartree,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,,3.0
Alexander Peartree,US,"Red fruit aromas pervade on the nose, with cig...",,87,32.0,Virginia,Virginia,,,Quiévremont 2012 Meritage (Virginia),Meritage,Quiévremont,,3.0
Alexander Peartree,US,Ripe aromas of dark berries mingle with ample ...,Vin de Maison,87,23.0,Virginia,Virginia,,,Quiévremont 2012 Vin de Maison Red (Virginia),Red Blend,Quiévremont,,3.0
Alexander Peartree,US,"Peach, banana peel and a slight petrol note sh...",Mont Sec Vineyards,85,14.0,Texas,Texas,,,Mont Sec 2015 Mont Sec Vineyards Viognier (Texas),Viognier,Mont Sec,,3.0
Alexander Peartree,US,There seems to be a dusty veil over the fruit ...,,85,,Texas,Texas,,,McPherson 2014 Sangiovese (Texas),Sangiovese,McPherson,,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,Italy,Here's an easy blend of Cabernet Sauvignon and...,,86,10.0,Sicily & Sardinia,Sicilia,,,MandraRossa 2006 Cabernet Sauvignon-Shiraz (Si...,Cabernet Sauvignon-Shiraz,MandraRossa,,
,US,Fritz has made tremendous progress with Cab ov...,,91,35.0,California,Dry Creek Valley,Sonoma,,Fritz 2005 Cabernet Sauvignon (Dry Creek Valley),Cabernet Sauvignon,Fritz,,
,US,"Shows the clean, citrus acid juiciness and sil...",Fiddlestix Vineyard,91,48.0,California,Sta. Rita Hills,Central Coast,,Pali 2006 Fiddlestix Vineyard Pinot Noir (Sta....,Pinot Noir,Pali,,
,Italy,"This luminous sparkler has a sweet, fruit-forw...",,91,38.0,Veneto,Prosecco Superiore di Cartizze,,,Col Vetoraz Spumanti NV Prosecco Superiore di...,Prosecco,Col Vetoraz Spumanti,,


In [18]:
# JOIN -- using pd.merge: more concise
pd.merge(wine_reviews,wine_taster,on='taster_name',how='left')

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle_x,title,variety,winery,taster_twitter_handle_y,taster_level
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,@kerinokeefe,5.0
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,@vossroger,5.0
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,@paulgwine,5.0
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,,3.0
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,@paulgwine,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef),,4.0
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation,@paulgwine,5.0
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser,@vossroger,5.0
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,@vossroger,5.0
