# Prototyping and testing

In [1]:
import pandas as pd

## Load in the Data

In [2]:
csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_description.csv')
description = pd.DataFrame(csv)

In [3]:
style_csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_style.csv')
style = pd.DataFrame(style_csv)

## Build sets from style columns for set-wise comparison operations

In [4]:
description_set = set(description['style'].values)
style_set = set(style['beer_style'].values)

### Items in the sets that are present in both, require no further action

In [29]:
description_set & style_set # Present in both sets

{'American Adjunct Lager',
 'American Amber / Red Ale',
 'American Amber / Red Lager',
 'American Barleywine',
 'American Black Ale',
 'American Blonde Ale',
 'American Brown Ale',
 'American Dark Wheat Ale',
 'American IPA',
 'American Malt Liquor',
 'American Pale Ale (APA)',
 'American Pale Wheat Ale',
 'American Porter',
 'American Stout',
 'American Strong Ale',
 'American Wild Ale',
 'Baltic Porter',
 'Belgian Dark Ale',
 'Belgian IPA',
 'Belgian Pale Ale',
 'Belgian Strong Dark Ale',
 'Belgian Strong Pale Ale',
 'Bière de Champagne / Bière Brut',
 'Braggot',
 'California Common / Steam Beer',
 'Chile Beer',
 'English Bitter',
 'English Brown Ale',
 'English Dark Mild Ale',
 'English India Pale Ale (IPA)',
 'English Pale Ale',
 'English Pale Mild Ale',
 'English Porter',
 'English Stout',
 'English Strong Ale',
 'Flanders Oud Bruin',
 'Flanders Red Ale',
 'Foreign / Export Stout',
 'Irish Dry Stout',
 'Irish Red Ale',
 'Japanese Rice Lager',
 'Low Alcohol Beer',
 'Munich Dunkel L

### Data that is in the Description Set but not in the Style set

In [5]:
description_set - style_set # Only in Description data not in Style data

{'American Brett',
 'American Brut IPA',
 'American Cream Ale',
 'American Imperial IPA',
 'American Imperial Pilsner',
 'American Imperial Porter',
 'American Imperial Red Ale',
 'American Imperial Stout',
 'American Lager',
 'American Light Lager',
 'American Wheatwine Ale',
 'Belgian Blonde Ale',
 'Belgian Dubbel',
 'Belgian Faro',
 'Belgian Fruit Lambic',
 'Belgian Gueuze',
 'Belgian Lambic',
 'Belgian Quadrupel (Quad)',
 'Belgian Saison',
 'Belgian Tripel',
 'Belgian Witbier',
 'Berliner Weisse',
 'Bohemian Pilsener',
 'British Barleywine',
 'English Extra Special / Strong Bitter (ESB)',
 'English Oatmeal Stout',
 'English Old Ale',
 'English Sweet / Milk Stout',
 'European Dark Lager',
 'European Export / Dortmunder',
 'European Pale Lager',
 'European Strong Lager',
 'Finnish Sahti',
 'French Bière de Garde',
 'Fruit and Field Beer',
 'German Altbier',
 'German Bock',
 'German Doppelbock',
 'German Dunkelweizen',
 'German Eisbock',
 'German Hefeweizen',
 'German Helles',
 'Germa

### Data that is in the Style Set but not in the Description set

In [6]:
style_set - description_set # Only in Style data not in description

{'Altbier',
 'American Double / Imperial IPA',
 'American Double / Imperial Pilsner',
 'American Double / Imperial Stout',
 'American Pale Lager',
 'Berliner Weissbier',
 'Bière de Garde',
 'Black & Tan',
 'Bock',
 'Cream Ale',
 'Czech Pilsener',
 'Doppelbock',
 'Dortmunder / Export Lager',
 'Dubbel',
 'Dunkelweizen',
 'Eisbock',
 'English Barleywine',
 'Euro Dark Lager',
 'Euro Pale Lager',
 'Euro Strong Lager',
 'Extra Special / Strong Bitter (ESB)',
 'Faro',
 'Fruit / Vegetable Beer',
 'German Pilsener',
 'Gose',
 'Gueuze',
 'Happoshu',
 'Hefeweizen',
 'Herbed / Spiced Beer',
 'Keller Bier / Zwickel Bier',
 'Kristalweizen',
 'Kvass',
 'Kölsch',
 'Lambic - Fruit',
 'Lambic - Unblended',
 'Light Lager',
 'Maibock / Helles Bock',
 'Milk / Sweet Stout',
 'Munich Helles Lager',
 'Märzen / Oktoberfest',
 'Oatmeal Stout',
 'Old Ale',
 'Pumpkin Ale',
 'Quadrupel (Quad)',
 'Rauchbier',
 'Roggenbier',
 'Sahti',
 'Saison / Farmhouse Ale',
 'Schwarzbier',
 'Smoked Beer',
 'Tripel',
 'Weizenbock

In [13]:
rating_csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_reviews.csv')
ratings = pd.DataFrame(rating_csv)

In [21]:
update = ratings.copy()

### Works but there has to be a better way...

In [24]:
update['beer_style'].loc[update['beer_style'] == 'Altbier'] = 'German Altbier'
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 'American Imperial IPA'
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial Pilsner'] = 'American Imperial Pilsner'
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial Stout'] = 'American Imperial Stout'
update['beer_style'].loc[update['beer_style'] == 'American Pale Lager'] = 'American Lager'
update['beer_style'].loc[update['beer_style'] == 'Berliner Weissbier'] = 'Berliner Weisse'
update['beer_style'].loc[update['beer_style'] == 'Bière de Garde'] = 'French Bière de Garde'
update['beer_style'].loc[update['beer_style'] == 'Black & Tan'] = 'American Porter'
update['beer_style'].loc[update['beer_style'] == 'Bock'] = 'German Bock'
update['beer_style'].loc[update['beer_style'] == 'Cream Ale'] = 'American Cream Ale'
# update['beer_style'].loc[update['beer_style'] == 'Czech Pilsener'] = 'Bohemian Pilsener'
update['beer_style'].loc[update['beer_style'] == 'Doppelbock'] = 'German Doppelbock'
update['beer_style'].loc[update['beer_style'] == 'Dortmunder / Export Lager'] = 'European Export / Dortmunder'
update['beer_style'].loc[update['beer_style'] == 'Dubbel'] = 
update['beer_style'].loc[update['beer_style'] == 'Dunkelweizen'] = 
update['beer_style'].loc[update['beer_style'] == 'Eisbock'] = 
update['beer_style'].loc[update['beer_style'] == 'English Barleywine'] = 
update['beer_style'].loc[update['beer_style'] == 'Euro Dark Lager'] = 
update['beer_style'].loc[update['beer_style'] == 'Euro Pale Lager'] = 
update['beer_style'].loc[update['beer_style'] == 'Euro Strong Lager'] = 
update['beer_style'].loc[update['beer_style'] == 'Extra Special / Strong Bitter (ESB)'] = 
update['beer_style'].loc[update['beer_style'] == 'Faro'] = 
update['beer_style'].loc[update['beer_style'] == 'Fruit / Vegetable Beer'] = 
update['beer_style'].loc[update['beer_style'] == 'German Pilsener'] = 
update['beer_style'].loc[update['beer_style'] == 'Gose'] = 
update['beer_style'].loc[update['beer_style'] == 'Gueuze'] = 
update['beer_style'].loc[update['beer_style'] == 'Happoshu'] = 
update['beer_style'].loc[update['beer_style'] == 'Hefeweizen'] = 
update['beer_style'].loc[update['beer_style'] == 'Herbed / Spiced Beer'] = 
update['beer_style'].loc[update['beer_style'] == 'Keller Bier / Zwickel Bier'] = 
update['beer_style'].loc[update['beer_style'] == 'Kristalweizen'] = 
update['beer_style'].loc[update['beer_style'] == 'Kvass'] = 
update['beer_style'].loc[update['beer_style'] == 'Kölsch'] = 
update['beer_style'].loc[update['beer_style'] == 'Lambic - Fruit'] = 
update['beer_style'].loc[update['beer_style'] == 'Lambic - Unblended'] = 
update['beer_style'].loc[update['beer_style'] == 'Light Lager'] = 
update['beer_style'].loc[update['beer_style'] == 'Maibock / Helles Bock'] = 
update['beer_style'].loc[update['beer_style'] == 'Milk / Sweet Stout'] = 
update['beer_style'].loc[update['beer_style'] == 'Munich Helles Lager'] = 
update['beer_style'].loc[update['beer_style'] == 'Märzen / Oktoberfest'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [28]:
update.loc[update['beer_style'] == 'Black & Tan']

Unnamed: 0,brewery_id,brewery_name,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid
2500,421,Formosa Brewing Co.,1147537037,3.5,4.0,4.0,Mebuzzard,Black & Tan,3.5,4.0,Black & Tan,,1222
2501,421,Formosa Brewing Co.,1031638788,3.5,3.5,4.0,proc,Black & Tan,3.5,4.0,Black & Tan,,1222
2502,421,Formosa Brewing Co.,997954109,3.0,3.0,3.0,Brent,Black & Tan,3.0,3.0,Black & Tan,,1222
5458,14879,Hoppin' Frog Brewery,1216588253,4.5,4.0,4.0,JoeyBeerBelly,Black & Tan,4.0,4.0,Bodacious Black & Tan,7.6,33662
5761,14879,Hoppin' Frog Brewery,1242214833,4.0,3.5,3.5,bort11,Black & Tan,3.5,4.0,Bodacious Black & Tan,7.6,33662
5769,14879,Hoppin' Frog Brewery,1208537466,4.5,4.0,4.0,Kegatron,Black & Tan,4.0,4.5,Bodacious Black & Tan,7.6,33662
5770,14879,Hoppin' Frog Brewery,1208320720,4.0,5.0,4.0,hustlesworth,Black & Tan,4.5,4.5,Bodacious Black & Tan,7.6,33662
5771,14879,Hoppin' Frog Brewery,1208222665,4.0,3.5,3.5,currysage1,Black & Tan,4.5,4.0,Bodacious Black & Tan,7.6,33662
5772,14879,Hoppin' Frog Brewery,1208022694,4.5,4.0,4.0,HopDerek,Black & Tan,4.5,4.5,Bodacious Black & Tan,7.6,33662
5773,14879,Hoppin' Frog Brewery,1207095479,4.0,4.0,4.5,srobertson59,Black & Tan,4.0,4.5,Bodacious Black & Tan,7.6,33662
