In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

pd.options.display.max_rows = 10

In [2]:
df = pd.read_csv('data/beer_subset.csv.gz', parse_dates=['time'], compression='gzip')
review_cols = ['review_appearance', 'review_aroma', 'review_overall',
               'review_palate', 'review_taste']
df.head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
0,7.0,2511,287,Bell's Cherry Stout,American Stout,4.5,4,4.5,4.0,blaheath,4.5,Batch 8144\tPitch black in color with a 1/2 f...,2009-10-05 21:31:48
1,5.7,19736,9790,Duck-Rabbit Porter,American Porter,4.5,4,4.5,4.0,GJ40,4.0,Sampled from a 12oz bottle in a standard pint...,2009-10-05 21:32:09
2,4.8,11098,3182,Fürstenberg Premium Pilsener,German Pilsener,4.0,3,3.0,3.0,biegaman,3.5,Haystack yellow with an energetic group of bu...,2009-10-05 21:32:13
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4,4.0,4.0,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
4,5.8,398,119,Wolaver's Pale Ale,American Pale Ale (APA),4.0,3,4.0,3.5,champ103,3.0,A: Pours a slightly hazy golden/orange color....,2009-10-05 21:33:14


## Boolean indexing

Like a where clause in SQL.

In [3]:
df.abv < 5

0        False
1        False
2         True
3        False
4        False
         ...  
99995    False
99996    False
99997    False
99998    False
99999    False
Name: abv, dtype: bool

In [4]:
df[df.abv < 5]

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
2,4.80,11098,3182,Fürstenberg Premium Pilsener,German Pilsener,4.0,3.0,3.0,3.0,biegaman,3.5,Haystack yellow with an energetic group of bu...,2009-10-05 21:32:13
7,4.80,1669,256,Great White,Witbier,4.5,4.5,4.5,4.5,n0rc41,4.5,"Ok, for starters great white I believe will b...",2009-10-05 21:34:29
21,4.60,401,118,Dark Island,Scottish Ale,4.0,4.0,3.5,4.0,abuliarose,4.0,"Poured into a snifter, revealing black opaque...",2009-10-05 21:47:36
22,4.90,5044,18968,Kipona Fest,Märzen / Oktoberfest,4.0,3.5,4.0,4.0,drcarver,4.0,A - a medium brown body with an off white hea...,2009-10-05 21:47:56
28,4.60,401,118,Dark Island,Scottish Ale,4.0,4.0,4.5,4.0,sisuspeed,4.0,The color of this beer fits the name well. Op...,2009-10-05 21:53:38
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99963,4.40,429,1,Pilsner Urquell,Czech Pilsener,3.0,2.0,1.0,1.0,raffy4,1.0,I had a 500ml can of this the other night. Th...,2010-03-07 00:59:36
99966,4.90,54904,35,Samuel Adams Noble Pils,Czech Pilsener,4.0,4.0,5.0,4.5,KTCamm,5.0,Served on tap twice i the past week - Leroy S...,2010-03-07 01:06:25
99968,3.80,17284,113,Samuel Smith's Tadcaster Bitter,English Bitter,3.5,4.0,5.0,4.0,meeekyh,4.0,I poured this one from a 550ml bottle into a ...,2010-03-07 01:07:40
99978,4.40,56618,10153,Pisgah Helles Lager,Munich Helles Lager,4.0,3.5,5.0,4.5,mikesgroove,4.5,A new one I got to try today and could not ha...,2010-03-07 01:14:09


Notice that we just used `[]` there. We can pass the boolean indexer in to `.loc` as well.

In [5]:
df.loc[df.abv < 5, ['beer_style', 'review_overall']]

Unnamed: 0,beer_style,review_overall
2,German Pilsener,3.0
7,Witbier,4.5
21,Scottish Ale,3.5
22,Märzen / Oktoberfest,4.0
28,Scottish Ale,4.5
...,...,...
99963,Czech Pilsener,1.0
99966,Czech Pilsener,5.0
99968,English Bitter,5.0
99978,Munich Helles Lager,5.0


In [6]:
df[((df.abv < 5) & (df.time > pd.Timestamp('2009-06'))) | (df.review_overall >= 4)]

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
0,7.0,2511,287,Bell's Cherry Stout,American Stout,4.5,4.0,4.5,4.0,blaheath,4.5,Batch 8144\tPitch black in color with a 1/2 f...,2009-10-05 21:31:48
1,5.7,19736,9790,Duck-Rabbit Porter,American Porter,4.5,4.0,4.5,4.0,GJ40,4.0,Sampled from a 12oz bottle in a standard pint...,2009-10-05 21:32:09
2,4.8,11098,3182,Fürstenberg Premium Pilsener,German Pilsener,4.0,3.0,3.0,3.0,biegaman,3.5,Haystack yellow with an energetic group of bu...,2009-10-05 21:32:13
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4.0,4.0,4.0,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
4,5.8,398,119,Wolaver's Pale Ale,American Pale Ale (APA),4.0,3.0,4.0,3.5,champ103,3.0,A: Pours a slightly hazy golden/orange color....,2009-10-05 21:33:14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99994,8.6,53388,16315,Bashah,American Black Ale,4.0,4.0,4.0,3.5,northyorksammy,3.5,"Pours a dark grey colour, with a bit of head....",2010-03-07 01:28:05
99995,6.5,11565,5337,IPA,American IPA,4.0,4.5,4.5,4.5,SpdKilz,4.5,"Appearance - Pours a remarkably clear, amber ...",2010-03-07 01:29:31
99996,7.5,15881,694,Tröegs Nugget Nectar,American Amber / Red Ale,4.0,4.0,4.0,3.5,MisterDeeds,4.0,Pouring from a 12 oz bottle into a perfect pi...,2010-03-07 01:30:35
99998,7.0,829,266,Jamaica Sunset India Pale Ale,American IPA,3.0,4.0,4.0,3.5,northyorksammy,4.0,I think this IPA somewhat undervalued. It had...,2010-03-07 01:33:29


In [7]:
df.query("(abv < 5 and time > '2009-06') | review_overall >= 4").head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
0,7.0,2511,287,Bell's Cherry Stout,American Stout,4.5,4,4.5,4.0,blaheath,4.5,Batch 8144\tPitch black in color with a 1/2 f...,2009-10-05 21:31:48
1,5.7,19736,9790,Duck-Rabbit Porter,American Porter,4.5,4,4.5,4.0,GJ40,4.0,Sampled from a 12oz bottle in a standard pint...,2009-10-05 21:32:09
2,4.8,11098,3182,Fürstenberg Premium Pilsener,German Pilsener,4.0,3,3.0,3.0,biegaman,3.5,Haystack yellow with an energetic group of bu...,2009-10-05 21:32:13
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4,4.0,4.0,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
4,5.8,398,119,Wolaver's Pale Ale,American Pale Ale (APA),4.0,3,4.0,3.5,champ103,3.0,A: Pours a slightly hazy golden/orange color....,2009-10-05 21:33:14


In [8]:
df[df.beer_style.isin(['American IPA', 'Pilsner'])].head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
8,6.7,6549,140,Northern Hemisphere Harvest Wet Hop Ale,American IPA,4.0,4.0,4.0,4.0,david18,4.0,I like all of Sierra Nevada's beers but felt ...,2009-10-05 21:34:31
23,6.5,44727,596,Portsmouth 5 C's IPA,American IPA,4.5,5.0,5.0,4.5,ALeF,5.0,As a devoted drinker of American and English ...,2009-10-05 21:48:46
26,5.9,37477,140,Sierra Nevada Anniversary Ale (2007-2009),American IPA,4.5,4.5,4.5,4.5,n0rc41,4.5,Poured a great dark color with great smell! t...,2009-10-05 21:51:33
32,7.5,6076,651,Flower Power India Pale Ale,American IPA,3.5,4.5,4.0,3.5,OnThenIn,4.0,Appearance: The beer pours a rather cloudy da...,2009-10-05 22:02:11
48,6.7,44749,140,Sierra Nevada Chico Estate Harvest Wet Hop Ale...,American IPA,4.5,3.5,4.0,4.5,mikey711,4.0,I love this concept. Way to go Sierra Nevada!...,2009-10-05 22:19:33


In [9]:
# exercise
df[df.beer_style.str.contains("IPA")].head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4.0,4.0,4.0,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
8,6.7,6549,140,Northern Hemisphere Harvest Wet Hop Ale,American IPA,4.0,4.0,4.0,4.0,david18,4.0,I like all of Sierra Nevada's beers but felt ...,2009-10-05 21:34:31
16,8.0,36179,3818,Hoppe (Imperial Extra Pale Ale),American Double / Imperial IPA,4.0,3.0,4.0,3.5,nick76,3.0,"The aroma is papery with citrus, yeast, and s...",2009-10-05 21:43:23
23,6.5,44727,596,Portsmouth 5 C's IPA,American IPA,4.5,5.0,5.0,4.5,ALeF,5.0,As a devoted drinker of American and English ...,2009-10-05 21:48:46
26,5.9,37477,140,Sierra Nevada Anniversary Ale (2007-2009),American IPA,4.5,4.5,4.5,4.5,n0rc41,4.5,Poured a great dark color with great smell! t...,2009-10-05 21:51:33


In [10]:
review_cols

['review_appearance',
 'review_aroma',
 'review_overall',
 'review_palate',
 'review_taste']

Exercise: select the rows where the scores of the 5 `review_cols` 5 `review_cols` ('review_appearance', 'review_aroma', 'review_overall', 'review_palate', 'review_taste') are *all* at least 4.0.

In [11]:
df[(df[review_cols] >= 4.0).all(1)].head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
0,7.0,2511,287,Bell's Cherry Stout,American Stout,4.5,4.0,4.5,4.0,blaheath,4.5,Batch 8144\tPitch black in color with a 1/2 f...,2009-10-05 21:31:48
1,5.7,19736,9790,Duck-Rabbit Porter,American Porter,4.5,4.0,4.5,4.0,GJ40,4.0,Sampled from a 12oz bottle in a standard pint...,2009-10-05 21:32:09
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4.0,4.0,4.0,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
7,4.8,1669,256,Great White,Witbier,4.5,4.5,4.5,4.5,n0rc41,4.5,"Ok, for starters great white I believe will b...",2009-10-05 21:34:29
8,6.7,6549,140,Northern Hemisphere Harvest Wet Hop Ale,American IPA,4.0,4.0,4.0,4.0,david18,4.0,I like all of Sierra Nevada's beers but felt ...,2009-10-05 21:34:31


Exercise: select rows where the average of the 5 `review_cols` ('review_appearance', 'review_aroma', 'review_overall', 'review_palate', 'review_taste') is at least 4.

In [12]:
df[df[review_cols].mean(1) >= 4].head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
0,7.0,2511,287,Bell's Cherry Stout,American Stout,4.5,4,4.5,4,blaheath,4.5,Batch 8144\tPitch black in color with a 1/2 f...,2009-10-05 21:31:48
1,5.7,19736,9790,Duck-Rabbit Porter,American Porter,4.5,4,4.5,4,GJ40,4.0,Sampled from a 12oz bottle in a standard pint...,2009-10-05 21:32:09
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4,4.0,4,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
5,7.0,966,365,Pike Street XXXXX Stout,American Stout,4.0,4,3.5,4,sprucetip,4.5,"From notes. Pours black, thin mocha head fade...",2009-10-05 21:33:48
6,6.2,53128,1114,Smokin' Amber Kegs Gone Wild,American Amber / Red Ale,3.5,4,4.5,4,Deuane,4.5,An American amber with the addition of smoked...,2009-10-05 21:34:24


In [13]:
df.head()

Unnamed: 0,abv,beer_id,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,profile_name,review_taste,text,time
0,7.0,2511,287,Bell's Cherry Stout,American Stout,4.5,4,4.5,4.0,blaheath,4.5,Batch 8144\tPitch black in color with a 1/2 f...,2009-10-05 21:31:48
1,5.7,19736,9790,Duck-Rabbit Porter,American Porter,4.5,4,4.5,4.0,GJ40,4.0,Sampled from a 12oz bottle in a standard pint...,2009-10-05 21:32:09
2,4.8,11098,3182,Fürstenberg Premium Pilsener,German Pilsener,4.0,3,3.0,3.0,biegaman,3.5,Haystack yellow with an energetic group of bu...,2009-10-05 21:32:13
3,9.5,28577,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4,4.0,4.0,nick76,4.0,"The aroma has pine, wood, citrus, caramel, an...",2009-10-05 21:32:37
4,5.8,398,119,Wolaver's Pale Ale,American Pale Ale (APA),4.0,3,4.0,3.5,champ103,3.0,A: Pours a slightly hazy golden/orange color....,2009-10-05 21:33:14


In [14]:
#df[df[review_cols] > 4]

# Hierarchical Indexing

Feel the power

In [15]:
reviews = df.set_index(['profile_name', 'beer_id', 'time'])
reviews.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abv,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,review_taste,text
profile_name,beer_id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
blaheath,2511,2009-10-05 21:31:48,7.0,287,Bell's Cherry Stout,American Stout,4.5,4,4.5,4.0,4.5,Batch 8144\tPitch black in color with a 1/2 f...
GJ40,19736,2009-10-05 21:32:09,5.7,9790,Duck-Rabbit Porter,American Porter,4.5,4,4.5,4.0,4.0,Sampled from a 12oz bottle in a standard pint...
biegaman,11098,2009-10-05 21:32:13,4.8,3182,Fürstenberg Premium Pilsener,German Pilsener,4.0,3,3.0,3.0,3.5,Haystack yellow with an energetic group of bu...
nick76,28577,2009-10-05 21:32:37,9.5,3818,Unearthly (Imperial India Pale Ale),American Double / Imperial IPA,4.0,4,4.0,4.0,4.0,"The aroma has pine, wood, citrus, caramel, an..."
champ103,398,2009-10-05 21:33:14,5.8,119,Wolaver's Pale Ale,American Pale Ale (APA),4.0,3,4.0,3.5,3.0,A: Pours a slightly hazy golden/orange color....


In [38]:
reviews = reviews.sort_index()
reviews.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abv,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,review_taste,text
profile_name,beer_id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0110x011,52530,2010-01-01 19:32:55,8.0,12959,Flaming Fury,American Wild Ale,4.0,5,5.0,4.0,4.5,Huge thanks to BevMoIan for opening this up!\...
01121987,29077,2009-11-30 03:44:42,9.0,11256,Corne De Brume,Scotch Ale / Wee Heavy,3.0,4,5.0,4.0,3.5,"Poured into a belgian beer glass, not great h..."
05Harley,131,2010-01-09 03:19:59,6.7,39,Ayinger Celebrator Doppelbock,Doppelbock,4.0,3,4.0,4.0,4.0,"No bottle number, it's a mystery..\t\tPurchas..."
05Harley,695,2010-02-06 01:09:05,8.5,222,Duvel,Belgian Strong Pale Ale,4.5,4,4.0,4.0,4.0,Best before 07/2011 L0821 16:06\t\tPurchased ...
05Harley,705,2010-03-06 21:36:58,11.5,178,J.W. Lees Vintage Harvest Ale,English Barleywine,4.0,4,3.5,4.5,4.0,Brewed in 2005\t\tPurchased @ Julio's liqours...


In [39]:
reviews.index.levels[0]

Index(['0110x011', '01121987', '05Harley', '0tt0', '100floods', '108Dragons',
       '110toyourleft', '11soccer11', '12puebloyankee', '1759Girl', 
       ...
       'zeomally', 'zev', 'zimm421', 'zms101', 'zoolzoo', 'zoso1967',
       'zoso493', 'zplug123', 'zythus', 'zzajjber'],
      dtype='object', name='profile_name', length=5990)

In [40]:
reviews.index.labels[0]

FrozenNDArray([0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 6, 6, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 12, 12, 12, 12, 12, 12, ...], dtype='int16')

In [43]:
top_reviewers = (reviews.index.get_level_values('profile_name')
                 .value_counts()
                 .head(5).index)
top_reviewers

Index(['Mora2000', 'drabmuh', 'northyorksammy', 'nickd717', 'jrallen34'], dtype='object')

In [57]:
reviews.loc[top_reviewers, :, :].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abv,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,review_taste,text
profile_name,beer_id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Mora2000,10,2009-10-15 05:17:25,7.0,4,Allagash Dubbel Ale,Dubbel,4,3.5,3.5,4,4.0,Batch 58. Poured into an Allagash goblet. App...
Mora2000,33,2009-12-28 06:01:37,8.0,22,Maudite,Belgian Strong Dark Ale,4,4.5,4.0,4,4.0,Beer pours dark brown to red with a huge whit...
Mora2000,34,2010-01-07 18:39:27,9.0,22,La Fin Du Monde,Tripel,4,4.5,4.5,4,4.5,Pours clear yellow with a white head. The aro...
Mora2000,111,2009-12-28 04:28:00,17.5,35,Samuel Adams Triple Bock,American Strong Ale,2,2.5,1.0,1,1.0,I tried this three times over a year before r...
Mora2000,129,2010-01-08 23:52:33,6.9,37,Orval Trappist Ale,Belgian Pale Ale,4,4.5,4.0,4,4.5,Pours murky orange with a large white head. T...


In [65]:
reviews.loc[(top_reviewers, 111, :), :]

SyntaxError: invalid syntax (<ipython-input-65-e9cec49c1d3a>, line 1)

In [66]:
reviews.loc[(top_reviewers, 111, slice(None)), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abv,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,review_taste,text
profile_name,beer_id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Mora2000,111,2009-12-28 04:28:00,17.5,35,Samuel Adams Triple Bock,American Strong Ale,2.0,2.5,1.0,1.0,1,I tried this three times over a year before r...
drabmuh,111,2010-02-10 01:20:08,17.5,35,Samuel Adams Triple Bock,American Strong Ale,4.0,4.5,3.5,3.5,4,"1997 vintage, split with my wife. I have read..."
nickd717,111,2009-10-18 23:56:12,17.5,35,Samuel Adams Triple Bock,American Strong Ale,3.5,3.0,1.5,3.5,3,"Wow, finally got to try this legendary beer t..."


In [69]:
reviews.loc['05Harley', review_cols].stack()

beer_id  time                                  
131      2010-01-09 03:19:59  review_appearance    4.0
                              review_aroma         3.0
                              review_overall       4.0
                              review_palate        4.0
                              review_taste         4.0
                                                  ... 
24071    2010-02-15 00:41:15  review_appearance    4.5
                              review_aroma         3.5
                              review_overall       4.5
                              review_palate        4.5
                              review_taste         4.0
dtype: float64

If feasiable, sort you MultiIndex. Speed, sanity.

Be careful with duplicates in the indicies.

In [59]:
reviews.index.is_unique

False

In [61]:
reviews[reviews.index.duplicated()]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abv,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,review_taste,text
profile_name,beer_id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
david18,935,2010-02-19 22:00:22,4.8,360,Warsteiner Premium Verum,German Pilsener,3.5,3.5,3,3,3,Had this at a german bar in Key West with my ...


In [64]:
reviews.loc[reviews.index.get_duplicates()]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abv,brewer_id,beer_name,beer_style,review_appearance,review_aroma,review_overall,review_palate,review_taste,text
profile_name,beer_id,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
david18,935,2010-02-19 22:00:22,4.8,360,Warsteiner Premium Verum,German Pilsener,3.5,3.5,4,4,3.5,This was my previous review of the bottle ver...
david18,935,2010-02-19 22:00:22,4.8,360,Warsteiner Premium Verum,German Pilsener,3.5,3.5,3,3,3.0,Had this at a german bar in Key West with my ...


In [67]:
reviews = reviews[~reviews.index.duplicated()]
reviews.index.is_unique

True

In [82]:
reviews.abv.mean(level='beer_id')

beer_id
5         4.5
6         5.6
7         4.2
8         4.2
9         4.2
         ... 
63863     4.5
64760    10.0
66036    12.0
68386     4.5
71037     9.0
Name: abv, dtype: float64

This is actually a shortcut for `reviews.groupby(level='beer_id').abv.mean()`, but we haven't seen groupby yet.

# Recap

- Boolean masks should always be 1-dimensional and the same length