# Prototyping and testing

In [1]:
import pandas as pd

## Load in the Data

In [2]:
csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_description.csv')
description = pd.DataFrame(csv)

In [3]:
style_csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_style.csv')
style = pd.DataFrame(style_csv)

## Build sets from style columns for set-wise comparison operations

In [4]:
description_set = set(description['style'].values)
style_set = set(style['beer_style'].values)

### Items in the sets that are present in both, require no further action

In [5]:
description_set & style_set # Present in both sets

{'American Adjunct Lager',
 'American Amber / Red Ale',
 'American Amber / Red Lager',
 'American Barleywine',
 'American Black Ale',
 'American Blonde Ale',
 'American Brown Ale',
 'American Dark Wheat Ale',
 'American IPA',
 'American Malt Liquor',
 'American Pale Ale (APA)',
 'American Pale Wheat Ale',
 'American Porter',
 'American Stout',
 'American Strong Ale',
 'American Wild Ale',
 'Baltic Porter',
 'Belgian Dark Ale',
 'Belgian IPA',
 'Belgian Pale Ale',
 'Belgian Strong Dark Ale',
 'Belgian Strong Pale Ale',
 'Bière de Champagne / Bière Brut',
 'Braggot',
 'California Common / Steam Beer',
 'Chile Beer',
 'English Bitter',
 'English Brown Ale',
 'English Dark Mild Ale',
 'English India Pale Ale (IPA)',
 'English Pale Ale',
 'English Pale Mild Ale',
 'English Porter',
 'English Stout',
 'English Strong Ale',
 'Flanders Oud Bruin',
 'Flanders Red Ale',
 'Foreign / Export Stout',
 'Irish Dry Stout',
 'Irish Red Ale',
 'Japanese Rice Lager',
 'Low Alcohol Beer',
 'Munich Dunkel L

### Data that is in the Description Set but not in the Style set

In [6]:
description_set - style_set # Only in Description data not in Style data

{'American Brett',
 'American Brut IPA',
 'American Cream Ale',
 'American Imperial IPA',
 'American Imperial Pilsner',
 'American Imperial Porter',
 'American Imperial Red Ale',
 'American Imperial Stout',
 'American Lager',
 'American Light Lager',
 'American Wheatwine Ale',
 'Belgian Blonde Ale',
 'Belgian Dubbel',
 'Belgian Faro',
 'Belgian Fruit Lambic',
 'Belgian Gueuze',
 'Belgian Lambic',
 'Belgian Quadrupel (Quad)',
 'Belgian Saison',
 'Belgian Tripel',
 'Belgian Witbier',
 'Berliner Weisse',
 'Bohemian Pilsener',
 'British Barleywine',
 'English Extra Special / Strong Bitter (ESB)',
 'English Oatmeal Stout',
 'English Old Ale',
 'English Sweet / Milk Stout',
 'European Dark Lager',
 'European Export / Dortmunder',
 'European Pale Lager',
 'European Strong Lager',
 'Finnish Sahti',
 'French Bière de Garde',
 'Fruit and Field Beer',
 'German Altbier',
 'German Bock',
 'German Doppelbock',
 'German Dunkelweizen',
 'German Eisbock',
 'German Hefeweizen',
 'German Helles',
 'Germa

### Data that is in the Style Set but not in the Description set

In [7]:
style_set - description_set # Only in Style data not in description

{'Altbier',
 'American Double / Imperial IPA',
 'American Double / Imperial Pilsner',
 'American Double / Imperial Stout',
 'American Pale Lager',
 'Berliner Weissbier',
 'Bière de Garde',
 'Black & Tan',
 'Bock',
 'Cream Ale',
 'Czech Pilsener',
 'Doppelbock',
 'Dortmunder / Export Lager',
 'Dubbel',
 'Dunkelweizen',
 'Eisbock',
 'English Barleywine',
 'Euro Dark Lager',
 'Euro Pale Lager',
 'Euro Strong Lager',
 'Extra Special / Strong Bitter (ESB)',
 'Faro',
 'Fruit / Vegetable Beer',
 'German Pilsener',
 'Gose',
 'Gueuze',
 'Happoshu',
 'Hefeweizen',
 'Herbed / Spiced Beer',
 'Keller Bier / Zwickel Bier',
 'Kristalweizen',
 'Kvass',
 'Kölsch',
 'Lambic - Fruit',
 'Lambic - Unblended',
 'Light Lager',
 'Maibock / Helles Bock',
 'Milk / Sweet Stout',
 'Munich Helles Lager',
 'Märzen / Oktoberfest',
 'Oatmeal Stout',
 'Old Ale',
 'Pumpkin Ale',
 'Quadrupel (Quad)',
 'Rauchbier',
 'Roggenbier',
 'Sahti',
 'Saison / Farmhouse Ale',
 'Schwarzbier',
 'Smoked Beer',
 'Tripel',
 'Weizenbock

In [8]:
rating_csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_reviews.csv')
ratings = pd.DataFrame(rating_csv)

In [9]:
update = ratings.copy()

### Works but there has to be a better way...

In [10]:
update['beer_style'].loc[update['beer_style'] == 'Altbier'] = 'German Altbier'
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial IPA'] = 'American Imperial IPA'
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial Pilsner'] = 'American Imperial Pilsner'
update['beer_style'].loc[update['beer_style'] == 'American Double / Imperial Stout'] = 'American Imperial Stout'
update['beer_style'].loc[update['beer_style'] == 'American Pale Lager'] = 'American Lager'
update['beer_style'].loc[update['beer_style'] == 'Berliner Weissbier'] = 'Berliner Weisse'
update['beer_style'].loc[update['beer_style'] == 'Bière de Garde'] = 'French Bière de Garde'
update['beer_style'].loc[update['beer_style'] == 'Black & Tan'] = 'American Porter'
update['beer_style'].loc[update['beer_style'] == 'Bock'] = 'German Bock'
update['beer_style'].loc[update['beer_style'] == 'Cream Ale'] = 'American Cream Ale'
update['beer_style'].loc[update['beer_style'] == 'Czech Pilsener'] = 'Bohemian Pilsener'
update['beer_style'].loc[update['beer_style'] == 'Doppelbock'] = 'German Doppelbock'
update['beer_style'].loc[update['beer_style'] == 'Dortmunder / Export Lager'] = 'European Export / Dortmunder'
update['beer_style'].loc[update['beer_style'] == 'Dubbel'] = 'Belgian Dubbel'
update['beer_style'].loc[update['beer_style'] == 'Dunkelweizen'] = 'German Dunkelweizen'
update['beer_style'].loc[update['beer_style'] == 'Eisbock'] = 'German Eisbock'
update['beer_style'].loc[update['beer_style'] == 'English Barleywine'] = 'British Barleywine'
update['beer_style'].loc[update['beer_style'] == 'Euro Dark Lager'] = 'European Dark Lager'
update['beer_style'].loc[update['beer_style'] == 'Euro Pale Lager'] = 'European Pale Lager'
update['beer_style'].loc[update['beer_style'] == 'Euro Strong Lager'] = 'European Strong Lager'
update['beer_style'].loc[update['beer_style'] == 'Extra Special / Strong Bitter (ESB)'] = 'English Extra Special / Strong Bitter (ESB)'
update['beer_style'].loc[update['beer_style'] == 'Faro'] = 'Belgian Faro'
update['beer_style'].loc[update['beer_style'] == 'Fruit / Vegetable Beer'] = 'Fruit and Field Beer'
update['beer_style'].loc[update['beer_style'] == 'German Pilsener'] = 'German Pilsner'
update['beer_style'].loc[update['beer_style'] == 'Gose'] = 'Leipzig Gose'
update['beer_style'].loc[update['beer_style'] == 'Gueuze'] = 'Belgian Gueuze'
update['beer_style'].loc[update['beer_style'] == 'Happoshu'] = 'Japanese Happoshu'
update['beer_style'].loc[update['beer_style'] == 'Hefeweizen'] = 'German Hefeweizen'
update['beer_style'].loc[update['beer_style'] == 'Herbed / Spiced Beer'] = 'Herb and Spice Beer'
update['beer_style'].loc[update['beer_style'] == 'Keller Bier / Zwickel Bier'] = 'German Kellerbier / Zwickelbier'
update['beer_style'].loc[update['beer_style'] == 'Kristalweizen'] = 'German Kristalweizen'
update['beer_style'].loc[update['beer_style'] == 'Kvass'] = 'Russian Kvass'
update['beer_style'].loc[update['beer_style'] == 'Kölsch'] = 'German Kölsch'
update['beer_style'].loc[update['beer_style'] == 'Lambic - Fruit'] = 'Belgian Fruit Lambic'
update['beer_style'].loc[update['beer_style'] == 'Lambic - Unblended'] = 'Belgian Lambic'
update['beer_style'].loc[update['beer_style'] == 'Light Lager'] = 'American Light Lager'
update['beer_style'].loc[update['beer_style'] == 'Maibock / Helles Bock'] = 'German Maibock'
update['beer_style'].loc[update['beer_style'] == 'Milk / Sweet Stout'] = 'English Sweet / Milk Stout'
update['beer_style'].loc[update['beer_style'] == 'Munich Helles Lager'] = 'German Helles'
update['beer_style'].loc[update['beer_style'] == 'Märzen / Oktoberfest'] = 'German Märzen / Oktoberfest'
update['beer_style'].loc[update['beer_style'] == 'Oatmeal Stout'] = 'English Oatmeal Stout'
update['beer_style'].loc[update['beer_style'] == 'Old Ale'] = 'English Old Ale'
update['beer_style'].loc[update['beer_style'] == 'Pumpkin Ale'] = 'Pumpkin Beer'
update['beer_style'].loc[update['beer_style'] == 'Quadrupel (Quad)'] = 'Belgian Quadrupel (Quad)'
update['beer_style'].loc[update['beer_style'] == 'Rauchbier'] = 'German Rauchbier'
update['beer_style'].loc[update['beer_style'] == 'Roggenbier'] = 'German Roggenbier'
update['beer_style'].loc[update['beer_style'] == 'Sahti'] = 'Finnish Sahti'
update['beer_style'].loc[update['beer_style'] == 'Saison / Farmhouse Ale'] = 'Belgian Saison' # No new category for Farmhouse
update['beer_style'].loc[update['beer_style'] == 'Schwarzbier'] = 'German Schwarzbier'
update['beer_style'].loc[update['beer_style'] == 'Smoked Beer'] = 'Smoke Beer'
update['beer_style'].loc[update['beer_style'] == 'Tripel'] = 'Belgian Tripel'
update['beer_style'].loc[update['beer_style'] == 'Weizenbock'] = 'German Weizenbock'
update['beer_style'].loc[update['beer_style'] == 'Wheatwine'] = 'American Wheatwine Ale'
update['beer_style'].loc[update['beer_style'] == 'Witbier'] = 'Belgian Witbier'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [11]:
update.to_csv('/home/grimoire/Projects/BeerRatings/rating_update.csv', index=False)

In [12]:
comparison_style = update.loc[:,['beer_style', 'review_overall', 'review_taste', 'review_appearance', 
                                    'review_palate', 'review_aroma']].groupby('beer_style').mean()
beer_style_counts = update.loc[:,['beer_style', 'beer_name']].groupby('beer_style').count()
beer_style_counts = beer_style_counts.rename({'beer_name':'count'}, axis=1)
comparison_style = comparison_style.merge(beer_style_counts, left_index=True, right_index=True)

In [13]:
style_set_updated = set(comparison_style.index)

In [14]:
len(ratings.beer_style.unique())

104

In [15]:
len(style_set_updated & description_set)

103

The difference comes from losing the 'Black and Tan' beer style which was updated to be an American Porter style

In [16]:
style_set_updated - description_set

set()

## All styles in our style_set_update now have a match pair to description_set

In [17]:
description_set - style_set_updated

{'American Brett',
 'American Brut IPA',
 'American Imperial Porter',
 'American Imperial Red Ale',
 'Belgian Blonde Ale',
 'New England IPA',
 'Robust Porter',
 'Smoke Porter'}

### These are styles that were added after our data set was released
If desired they _could_ be removed from the description set