In [3]:
import pandas as pd
reviews = pd.read_csv("winemag-data-130k-v2.csv", index_col=0)
pd.set_option('max_rows', 5)

# Native accessors

In [4]:
reviews

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36167,Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Morgante 2012 Nero d'Avola (Sicilia),Nero d'Avola,Morgante
36168,Greece,This rounded white starts with aromas of lemon...,Dry White,88,15.0,Peloponnese,,,Susan Kostrzewa,@suskostrzewa,My Big Fat Greek Wine 2012 Dry White Moschofi,,


In [5]:
# Access the property of an object by accessing it as an attribute
reviews.country

0           Italy
1        Portugal
           ...   
36167       Italy
36168      Greece
Name: country, Length: 36169, dtype: object

In [6]:
# Access its values using the indexing [] operator
reviews['country']

0           Italy
1        Portugal
           ...   
36167       Italy
36168      Greece
Name: country, Length: 36169, dtype: object

In [7]:
# indexing operator [] does have the advantage that it can handle column names with reserved characters in them
# (e.g. if we had a 'country providence' column, reviews.country providence wouldn't work)

In [8]:
# To drill down to a single specific value, use the indexing operator [] once more:
reviews['country'][0]

'Italy'

# Indexing in pandas
Index-based selection

In [9]:
# Select the first row
reviews.iloc[0]

country                                                    Italy
description    Aromas include tropical fruit, broom, brimston...
                                     ...                        
variety                                              White Blend
winery                                                   Nicosia
Name: 0, Length: 13, dtype: object

In [10]:
# Get a column with iloc
reviews.iloc[:, 0]

0           Italy
1        Portugal
           ...   
36167       Italy
36168      Greece
Name: country, Length: 36169, dtype: object

In [11]:
# the : operator means "everything".
# When combined with other selectors, however, it can be used to indicate a range of values.
reviews.iloc[:3, 0]

0       Italy
1    Portugal
2          US
Name: country, dtype: object

In [12]:
reviews.iloc[1:3, 0]

1    Portugal
2          US
Name: country, dtype: object

In [13]:
reviews.iloc[[0, 1, 2], 0]

0       Italy
1    Portugal
2          US
Name: country, dtype: object

In [14]:
# last five elements of the dataset
reviews.iloc[-5:]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
36164,Austria,"Distant woodsmoke, ripe blueberry and notions ...",,92,20.0,Eisenberg,,,Anne Krebiehl MW,@AnneInVino,Jalits 2013 Blaufränkisch (Eisenberg),Blaufränkisch,Jalits
36165,US,"Here is a generous, full-throttle, high-octane...",Epicenter Old Vines,88,20.0,California,Lodi,Central Valley,Jim Gordon,@gordone_cellars,Mettler Family Vineyards 2011 Epicenter Old Vi...,Zinfandel,Mettler Family Vineyards
36166,US,"Flowery and balanced in crisp, dry flavors of ...",La Brume,88,32.0,California,Dry Creek Valley,Sonoma,Virginie Boone,@vboone,Michel-Schlumberger 2012 La Brume Chardonnay (...,Chardonnay,Michel-Schlumberger
36167,Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Morgante 2012 Nero d'Avola (Sicilia),Nero d'Avola,Morgante
36168,Greece,This rounded white starts with aromas of lemon...,Dry White,88,15.0,Peloponnese,,,Susan Kostrzewa,@suskostrzewa,My Big Fat Greek Wine 2012 Dry White Moschofi,,


Label-based selection

In [15]:
# It's the data index value, not its position, which matters.
reviews.loc[0, 'country']

'Italy'

In [None]:
""" iloc is conceptually simpler than loc because it ignores the dataset's indices.
When we use iloc we treat the dataset like a big matrix(a list of lists), one that we have to index into by position.
loc, by contrast, uses the information in the indices to do its work. 
Since your dataset usually has meaningful indices, it's usually easier to do things using loc instead."""

In [16]:
reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']]

Unnamed: 0,taster_name,taster_twitter_handle,points
0,Kerin O’Keefe,@kerinokeefe,87
1,Roger Voss,@vossroger,87
...,...,...,...
36167,Kerin O’Keefe,@kerinokeefe,88
36168,Susan Kostrzewa,@suskostrzewa,88


Choosing between loc and iloc

In [17]:
""" iloc uses the Python stdlib indexing scheme, where the first element of the range is included and the last one excluded.
loc, meanwhile, indexes inclusively. So 0:10 will select entries 0,..., 10.

it's a lot more convenient to index df.loc['Apples':'Potatoes'] than it is to index something like df.loc['Apples', 'Potatoes']"""

' iloc uses the Python stdlib indexing scheme, where the first element of the range is included and the last one excluded.\nloc, meanwhile, indexes inclusively. So 0:10 will select entries 0,..., 10.'

# Manipulating the index

In [18]:
reviews.set_index("title")

Unnamed: 0_level_0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,variety,winery
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Nicosia 2013 Vulkà Bianco (Etna),Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,White Blend,Nicosia
Quinta dos Avidagos 2011 Avidagos Red (Douro),Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Portuguese Red,Quinta dos Avidagos
...,...,...,...,...,...,...,...,...,...,...,...,...
Morgante 2012 Nero d'Avola (Sicilia),Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Nero d'Avola,Morgante
My Big Fat Greek Wine 2012 Dry White Moschofi,Greece,This rounded white starts with aromas of lemon...,Dry White,88,15.0,Peloponnese,,,Susan Kostrzewa,@suskostrzewa,,


# Conditional selection

In [19]:
# For example, suppose that we're interested specifically in better-than-average wines produced in Italy.
# We can start by checking if each wine is Italian or not:
reviews.country == 'Italy'

0         True
1        False
         ...  
36167     True
36168    False
Name: country, Length: 36169, dtype: bool

In [20]:
reviews.loc[reviews.country == 'Italy']

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin O’Keefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36163,Italy,"A blend of Petit Verdot and Cabernet Franc, th...",,92,170.0,Tuscany,Toscana,,Kerin O’Keefe,@kerinokeefe,Campo alla Sughera 2008 Red (Toscana),Red Blend,Campo alla Sughera
36167,Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Morgante 2012 Nero d'Avola (Sicilia),Nero d'Avola,Morgante


In [21]:
# made in Italy and also which is rated above average
reviews.loc[(reviews.country == 'Italy') & (reviews.points >= 90)]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
120,Italy,"Slightly backward, particularly given the vint...",Bricco Rocche Prapó,92,70.0,Piedmont,Barolo,,,,Ceretto 2003 Bricco Rocche Prapó (Barolo),Nebbiolo,Ceretto
130,Italy,"At the first it was quite muted and subdued, b...",Bricco Rocche Brunate,91,70.0,Piedmont,Barolo,,,,Ceretto 2003 Bricco Rocche Brunate (Barolo),Nebbiolo,Ceretto
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36151,Italy,"Violet, iris, leather, tobacco, vanilla and sa...",Gran Selezione,92,100.0,Tuscany,Chianti Classico,,Kerin O’Keefe,@kerinokeefe,Castello di Radda 2010 Gran Selezione (Chiant...,Sangiovese,Castello di Radda
36163,Italy,"A blend of Petit Verdot and Cabernet Franc, th...",,92,170.0,Tuscany,Toscana,,Kerin O’Keefe,@kerinokeefe,Campo alla Sughera 2008 Red (Toscana),Red Blend,Campo alla Sughera


In [22]:
# made in Italy or which is rated above average
reviews.loc[(reviews.country == 'Italy') | (reviews.points >= 90)]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin O’Keefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36164,Austria,"Distant woodsmoke, ripe blueberry and notions ...",,92,20.0,Eisenberg,,,Anne Krebiehl MW,@AnneInVino,Jalits 2013 Blaufränkisch (Eisenberg),Blaufränkisch,Jalits
36167,Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Morgante 2012 Nero d'Avola (Sicilia),Nero d'Avola,Morgante


In [23]:
# pandas built-in conditional selctors; isin
reviews.loc[reviews.country.isin(['Italy', 'France'])]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin O’Keefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36163,Italy,"A blend of Petit Verdot and Cabernet Franc, th...",,92,170.0,Tuscany,Toscana,,Kerin O’Keefe,@kerinokeefe,Campo alla Sughera 2008 Red (Toscana),Red Blend,Campo alla Sughera
36167,Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Morgante 2012 Nero d'Avola (Sicilia),Nero d'Avola,Morgante


In [24]:
# pandas built-in conditional selctors; isnull(and its companion notnull)
# filter out wines lacking a price tag in the dataset
reviews.loc[reviews.price.notnull()]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
...,...,...,...,...,...,...,...,...,...,...,...,...,...
36167,Italy,Here's a straightforward and fruity Nero d'Avo...,,88,18.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Morgante 2012 Nero d'Avola (Sicilia),Nero d'Avola,Morgante
36168,Greece,This rounded white starts with aromas of lemon...,Dry White,88,15.0,Peloponnese,,,Susan Kostrzewa,@suskostrzewa,My Big Fat Greek Wine 2012 Dry White Moschofi,,


# Assigning data

In [25]:
# Assign a constant value
reviews['critic'] = 'everyone'
reviews['critic']

0        everyone
1        everyone
           ...   
36167    everyone
36168    everyone
Name: critic, Length: 36169, dtype: object

In [27]:
# Or with an iterable of values
reviews['index_backwards'] = range(len(reviews), 0, -1)
reviews['index_backwards']

0        36169
1        36168
         ...  
36167        2
36168        1
Name: index_backwards, Length: 36169, dtype: int64